Coverage for integrations / service_tools / hf_model_resolver.py: 0.0%

197 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2HF Model Resolver — find and download the best GGUF quantization from HuggingFace. 

3 

4Prefers Unsloth quantizations (fastest fine-tuning tool, best GGUF exports). 

5Auto-selects quantization level based on available VRAM. 

6 

7Usage: 

8 resolver = HFModelResolver() 

9 path = resolver.resolve("Qwen/Qwen3-8B") # Returns local GGUF path 

10 # Internally: finds unsloth/Qwen3-8B-GGUF, picks Q4_K_M for 8GB GPU, downloads 

11""" 

12 

13import logging 

14import re 

15import threading 

16from pathlib import Path 

17from typing import Dict, List, Optional, Tuple 

18 

19logger = logging.getLogger(__name__) 

20 

21# ── Quantization constants ─────────────────────────────────────────── 

22 

23# Quantization preference order (best quality first) 

24QUANT_PREFERENCE = [ 

25 'Q8_0', 'Q6_K_L', 'Q6_K', 'Q5_K_M', 'Q5_K_S', 

26 'Q4_K_L', 'Q4_K_M', 'Q4_K_S', 'IQ4_XS', 'Q4_0', 

27 'IQ3_M', 'IQ2_M', 'Q2_K', 

28] 

29 

30# VRAM thresholds for auto-selection: (min_free_vram_gb, target_quant) 

31VRAM_QUANT_MAP: List[Tuple[float, str]] = [ 

32 (24.0, 'Q8_0'), 

33 (16.0, 'Q6_K'), 

34 (8.0, 'Q4_K_M'), 

35 (4.0, 'Q4_K_S'), 

36 (0.0, 'Q4_0'), 

37] 

38 

39# Regex to extract quant label from GGUF filenames. 

40# Matches patterns like: Q4_K_M, Q8_0, IQ4_XS, Q6_K_L, Q2_K, etc. 

41_QUANT_RE = re.compile( 

42 r'(?:^|[._-])' 

43 r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)' 

44 r'(?:[._-]|$)', 

45 re.IGNORECASE, 

46) 

47 

48 

49def _extract_quant(filename: str) -> Optional[str]: 

50 """Extract quantization label from a GGUF filename. 

51 

52 Returns the quant string in upper-case (e.g. 'Q4_K_M') or None. 

53 """ 

54 m = _QUANT_RE.search(filename) 

55 if m: 

56 return m.group(1).upper() 

57 return None 

58 

59 

60def _quant_rank(quant: str) -> int: 

61 """Return rank of a quant in QUANT_PREFERENCE (lower = better quality). 

62 

63 Unknown quants get a high rank so known ones are preferred. 

64 """ 

65 try: 

66 return QUANT_PREFERENCE.index(quant) 

67 except ValueError: 

68 return len(QUANT_PREFERENCE) + 1 

69 

70 

71class HFModelResolver: 

72 """Resolve HuggingFace model names to local GGUF file paths. 

73 

74 Search strategy: 

75 1. Unsloth GGUF repos (preferred — best GGUF exports) 

76 2. Original org GGUF repos 

77 3. bartowski GGUF repos (popular community uploader) 

78 4. Original repo (may contain GGUF files directly) 

79 

80 Auto-selects quantization based on available VRAM via vram_manager. 

81 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/. 

82 """ 

83 

84 def __init__(self): 

85 self._download_lock = threading.Lock() 

86 self._storage = None # lazy 

87 self._hf_api = None # lazy 

88 

89 # ── Lazy accessors ─────────────────────────────────────────── 

90 

91 def _get_storage(self): 

92 """Lazy-load ModelStorageManager to avoid import cycles.""" 

93 if self._storage is None: 

94 from .model_storage import ModelStorageManager 

95 self._storage = ModelStorageManager() 

96 return self._storage 

97 

98 def _get_hf_api(self): 

99 """Lazy-load HfApi. Raises ImportError if huggingface_hub missing.""" 

100 if self._hf_api is None: 

101 from huggingface_hub import HfApi 

102 self._hf_api = HfApi() 

103 return self._hf_api 

104 

105 def _get_gpu_info(self) -> Dict: 

106 """Get GPU info from vram_manager singleton.""" 

107 try: 

108 from .vram_manager import vram_manager 

109 return vram_manager.detect_gpu() 

110 except Exception as e: 

111 logger.debug(f"GPU detection unavailable: {e}") 

112 return { 

113 'cuda_available': False, 

114 'total_gb': 0.0, 

115 'free_gb': 0.0, 

116 'name': None, 

117 } 

118 

119 # ── Main entry point ───────────────────────────────────────── 

120 

121 def resolve(self, model_name: str, quant: str = 'auto') -> Path: 

122 """Resolve a HF model name to a local GGUF file path. 

123 

124 Args: 

125 model_name: HuggingFace model identifier, e.g. "Qwen/Qwen3-8B" 

126 or "meta-llama/Llama-3.1-8B". 

127 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto' 

128 to pick based on available VRAM. 

129 

130 Returns: 

131 Path to the downloaded GGUF file on disk. 

132 

133 Raises: 

134 FileNotFoundError: If no GGUF repo could be found. 

135 RuntimeError: If download fails. 

136 ImportError: If huggingface_hub is not installed. 

137 """ 

138 logger.info(f"Resolving GGUF for {model_name} (quant={quant})") 

139 

140 # Step 1: find a repo that has GGUF files 

141 repo_id = self.find_gguf_repo(model_name) 

142 logger.info(f"Found GGUF repo: {repo_id}") 

143 

144 # Step 2: pick quantization 

145 filename = self.select_quantization(repo_id, quant) 

146 logger.info(f"Selected quantization file: {filename}") 

147 

148 # Step 3: download if needed 

149 local_path = self.download(repo_id, filename) 

150 logger.info(f"GGUF ready at: {local_path}") 

151 

152 return local_path 

153 

154 # ── Repo discovery ─────────────────────────────────────────── 

155 

156 def find_gguf_repo(self, model_name: str) -> str: 

157 """Search for a GGUF repo for the given model. 

158 

159 Search order (prefers Unsloth): 

160 1. unsloth/{basename}-GGUF 

161 2. {org}/{model}-GGUF 

162 3. bartowski/{basename}-GGUF 

163 4. {org}/{model} (original repo, check for .gguf files) 

164 

165 Args: 

166 model_name: e.g. "Qwen/Qwen3-8B" or "meta-llama/Llama-3.1-8B" 

167 

168 Returns: 

169 The repo_id string (e.g. "unsloth/Qwen3-8B-GGUF"). 

170 

171 Raises: 

172 FileNotFoundError: If no repo with GGUF files is found. 

173 ImportError: If huggingface_hub is not installed. 

174 """ 

175 # Parse org/basename 

176 if '/' in model_name: 

177 org, basename = model_name.split('/', 1) 

178 else: 

179 org = None 

180 basename = model_name 

181 

182 candidates = [ 

183 f"unsloth/{basename}-GGUF", 

184 ] 

185 if org: 

186 candidates.append(f"{org}/{basename}-GGUF") 

187 candidates.append(f"bartowski/{basename}-GGUF") 

188 # Original repo as last resort 

189 if org: 

190 candidates.append(f"{org}/{basename}") 

191 else: 

192 candidates.append(basename) 

193 

194 for repo_id in candidates: 

195 gguf_files = self._list_gguf_files(repo_id) 

196 if gguf_files: 

197 logger.info( 

198 f"Found {len(gguf_files)} GGUF file(s) in {repo_id}" 

199 ) 

200 return repo_id 

201 logger.debug(f"No GGUF files in {repo_id}") 

202 

203 raise FileNotFoundError( 

204 f"No GGUF repository found for '{model_name}'. " 

205 f"Searched: {', '.join(candidates)}" 

206 ) 

207 

208 def _list_gguf_files(self, repo_id: str) -> List[str]: 

209 """List .gguf files in a HuggingFace repo. 

210 

211 Returns an empty list if the repo does not exist or has no GGUF files. 

212 """ 

213 try: 

214 api = self._get_hf_api() 

215 all_files = api.list_repo_files(repo_id) 

216 return [f for f in all_files if f.lower().endswith('.gguf')] 

217 except ImportError: 

218 raise 

219 except Exception as e: 

220 # Repo not found (404), rate limited, network error, etc. 

221 logger.debug(f"Could not list files in {repo_id}: {e}") 

222 return [] 

223 

224 # ── Quantization selection ─────────────────────────────────── 

225 

226 def select_quantization(self, repo_id: str, quant: str = 'auto') -> str: 

227 """Select the best GGUF file from a repo. 

228 

229 If quant='auto', selects based on available VRAM: 

230 >= 24GB free: Q8_0 

231 >= 16GB free: Q6_K 

232 >= 8GB free: Q4_K_M 

233 >= 4GB free: Q4_K_S 

234 CPU only: Q4_0 

235 

236 If a specific quant is requested (e.g. 'Q4_K_M'), finds the closest 

237 available file. 

238 

239 Args: 

240 repo_id: HuggingFace repo containing GGUF files. 

241 quant: 'auto' or a specific quant label. 

242 

243 Returns: 

244 Filename of the selected GGUF file. 

245 

246 Raises: 

247 FileNotFoundError: If no suitable GGUF file is found. 

248 """ 

249 gguf_files = self._list_gguf_files(repo_id) 

250 if not gguf_files: 

251 raise FileNotFoundError( 

252 f"No GGUF files found in {repo_id}" 

253 ) 

254 

255 # Build a map of quant_label -> filename 

256 quant_map: Dict[str, str] = {} 

257 for fname in gguf_files: 

258 label = _extract_quant(fname) 

259 if label: 

260 # If multiple files have the same quant, prefer smaller 

261 # (single-file over split shards) 

262 if label not in quant_map or len(fname) < len(quant_map[label]): 

263 quant_map[label] = fname 

264 

265 if not quant_map: 

266 # No recognizable quant labels — return the first GGUF file 

267 logger.warning( 

268 f"No quant labels recognized in {repo_id}; " 

269 f"returning first GGUF file: {gguf_files[0]}" 

270 ) 

271 return gguf_files[0] 

272 

273 # Determine target quant 

274 if quant == 'auto': 

275 target = self._auto_select_quant() 

276 logger.info(f"Auto-selected target quant: {target}") 

277 else: 

278 target = quant.upper() 

279 

280 # Exact match 

281 if target in quant_map: 

282 return quant_map[target] 

283 

284 # Find the closest available quant by walking QUANT_PREFERENCE 

285 # from the target's position downward (lower quality), then upward. 

286 target_rank = _quant_rank(target) 

287 available = sorted(quant_map.keys(), key=_quant_rank) 

288 

289 # Prefer the next-lower quality that exists 

290 best_file = None 

291 best_distance = float('inf') 

292 for q in available: 

293 distance = abs(_quant_rank(q) - target_rank) 

294 if distance < best_distance: 

295 best_distance = distance 

296 best_file = quant_map[q] 

297 if distance == 0: 

298 break # exact match 

299 

300 if best_file is None: 

301 # Should not happen (quant_map is non-empty) but be safe 

302 best_file = next(iter(quant_map.values())) 

303 

304 logger.info( 

305 f"Requested {target}, best available: " 

306 f"{_extract_quant(best_file)} -> {best_file}" 

307 ) 

308 return best_file 

309 

310 def _auto_select_quant(self) -> str: 

311 """Pick a quant target based on current free VRAM.""" 

312 gpu_info = self._get_gpu_info() 

313 free_gb = gpu_info.get('free_gb', 0.0) 

314 

315 if not gpu_info.get('cuda_available', False): 

316 logger.info("No GPU detected, targeting CPU-friendly Q4_0") 

317 return 'Q4_0' 

318 

319 for threshold, quant in VRAM_QUANT_MAP: 

320 if free_gb >= threshold: 

321 logger.info( 

322 f"Free VRAM: {free_gb:.1f} GB >= {threshold} GB, " 

323 f"targeting {quant}" 

324 ) 

325 return quant 

326 

327 # Fallback (should not reach here since 0.0 is in the map) 

328 return 'Q4_0' 

329 

330 @staticmethod 

331 def _validate_gguf(path: Path) -> bool: 

332 """Check GGUF magic bytes (0x47475546 = 'GGUF') at file start.""" 

333 try: 

334 with open(path, 'rb') as f: 

335 magic = f.read(4) 

336 return magic == b'GGUF' 

337 except Exception: 

338 return False 

339 

340 # ── Download ───────────────────────────────────────────────── 

341 

342 def download(self, repo_id: str, filename: str) -> Path: 

343 """Download a GGUF file from HuggingFace. 

344 

345 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/{filename}. 

346 Thread-safe: only one download runs at a time. 

347 Skips download if file already exists with non-zero size. 

348 Updates the ModelStorageManager manifest on success. 

349 

350 Args: 

351 repo_id: HuggingFace repo, e.g. "unsloth/Qwen3-8B-GGUF". 

352 filename: GGUF filename within the repo. 

353 

354 Returns: 

355 Path to the local GGUF file. 

356 

357 Raises: 

358 RuntimeError: If the download fails. 

359 ImportError: If huggingface_hub is not installed. 

360 """ 

361 # Build local path 

362 repo_safe = repo_id.replace('/', '--') 

363 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf' / repo_safe 

364 local_path = gguf_dir / filename 

365 

366 # Skip if already downloaded AND valid GGUF (magic bytes check) 

367 if local_path.exists() and local_path.stat().st_size > 0: 

368 if self._validate_gguf(local_path): 

369 logger.info(f"Already downloaded: {local_path}") 

370 return local_path 

371 else: 

372 logger.warning(f"Corrupt/partial GGUF detected, re-downloading: {local_path}") 

373 local_path.unlink(missing_ok=True) 

374 

375 with self._download_lock: 

376 # Double-check after acquiring lock 

377 if local_path.exists() and local_path.stat().st_size > 0: 

378 if self._validate_gguf(local_path): 

379 logger.info(f"Already downloaded (post-lock): {local_path}") 

380 return local_path 

381 else: 

382 local_path.unlink(missing_ok=True) 

383 

384 gguf_dir.mkdir(parents=True, exist_ok=True) 

385 

386 logger.info( 

387 f"Downloading {filename} from {repo_id} " 

388 f"to {gguf_dir}..." 

389 ) 

390 

391 try: 

392 from huggingface_hub import hf_hub_download 

393 

394 downloaded_path = hf_hub_download( 

395 repo_id=repo_id, 

396 filename=filename, 

397 local_dir=str(gguf_dir), 

398 local_dir_use_symlinks=False, 

399 ) 

400 downloaded_path = Path(downloaded_path) 

401 

402 # hf_hub_download may place the file in a subfolder or 

403 # directly in local_dir — ensure we return the right path. 

404 if downloaded_path.exists(): 

405 actual_path = downloaded_path 

406 elif local_path.exists(): 

407 actual_path = local_path 

408 else: 

409 raise RuntimeError( 

410 f"Download completed but file not found at " 

411 f"{downloaded_path} or {local_path}" 

412 ) 

413 

414 size_bytes = actual_path.stat().st_size 

415 size_gb = size_bytes / (1024 ** 3) 

416 logger.info( 

417 f"Download complete: {actual_path.name} " 

418 f"({size_gb:.2f} GB)" 

419 ) 

420 

421 # Update manifest 

422 try: 

423 storage = self._get_storage() 

424 tool_name = f"gguf/{repo_safe}" 

425 storage.mark_downloaded( 

426 tool_name, 

427 source_url=f"hf://{repo_id}/{filename}", 

428 size_bytes=size_bytes, 

429 ) 

430 except Exception as e: 

431 logger.warning(f"Manifest update failed: {e}") 

432 

433 return actual_path 

434 

435 except ImportError: 

436 raise ImportError( 

437 "huggingface_hub is required for GGUF downloads. " 

438 "Install it with: pip install huggingface_hub" 

439 ) 

440 except Exception as e: 

441 logger.error(f"Download failed for {repo_id}/{filename}: {e}") 

442 raise RuntimeError( 

443 f"Failed to download {filename} from {repo_id}: {e}" 

444 ) from e 

445 

446 # ── Listing ────────────────────────────────────────────────── 

447 

448 def list_available(self, model_name: str) -> List[Dict]: 

449 """List all available GGUF files for a model. 

450 

451 Searches all candidate repos (Unsloth, original, bartowski) and 

452 returns a consolidated list of available files. 

453 

454 Args: 

455 model_name: e.g. "Qwen/Qwen3-8B" 

456 

457 Returns: 

458 List of dicts with keys: 

459 - repo_id: str 

460 - filename: str 

461 - quant: str or None 

462 - quant_rank: int (lower = better quality) 

463 - size_bytes: int or None (if available from API) 

464 """ 

465 if '/' in model_name: 

466 org, basename = model_name.split('/', 1) 

467 else: 

468 org = None 

469 basename = model_name 

470 

471 candidates = [f"unsloth/{basename}-GGUF"] 

472 if org: 

473 candidates.append(f"{org}/{basename}-GGUF") 

474 candidates.append(f"bartowski/{basename}-GGUF") 

475 if org: 

476 candidates.append(f"{org}/{basename}") 

477 

478 results: List[Dict] = [] 

479 seen_files = set() 

480 

481 for repo_id in candidates: 

482 try: 

483 api = self._get_hf_api() 

484 repo_info = api.list_repo_tree(repo_id) 

485 for item in repo_info: 

486 # item is a RepoFile or RepoFolder 

487 fname = getattr(item, 'rfilename', None) 

488 if fname is None: 

489 # Might be a RepoFolder or different API object 

490 fname = getattr(item, 'path', None) 

491 if not fname or not fname.lower().endswith('.gguf'): 

492 continue 

493 # Deduplicate by filename 

494 key = f"{repo_id}/{fname}" 

495 if key in seen_files: 

496 continue 

497 seen_files.add(key) 

498 

499 quant = _extract_quant(fname) 

500 size = getattr(item, 'size', None) 

501 

502 results.append({ 

503 'repo_id': repo_id, 

504 'filename': fname, 

505 'quant': quant, 

506 'quant_rank': _quant_rank(quant) if quant else 999, 

507 'size_bytes': size, 

508 }) 

509 except ImportError: 

510 raise 

511 except Exception as e: 

512 logger.debug(f"Could not list {repo_id}: {e}") 

513 

514 # Sort by quant quality (best first) 

515 results.sort(key=lambda r: r['quant_rank']) 

516 return results 

517 

518 

519# ── Singleton ──────────────────────────────────────────────────────── 

520 

521_resolver: Optional[HFModelResolver] = None 

522_resolver_lock = threading.Lock() 

523 

524 

525def get_resolver() -> HFModelResolver: 

526 """Get or create the global HFModelResolver singleton.""" 

527 global _resolver 

528 if _resolver is None: 

529 with _resolver_lock: 

530 if _resolver is None: 

531 _resolver = HFModelResolver() 

532 return _resolver