Coverage for integrations / service_tools / hf_model_resolver.py: 0.0%
197 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2HF Model Resolver — find and download the best GGUF quantization from HuggingFace.
4Prefers Unsloth quantizations (fastest fine-tuning tool, best GGUF exports).
5Auto-selects quantization level based on available VRAM.
7Usage:
8 resolver = HFModelResolver()
9 path = resolver.resolve("Qwen/Qwen3-8B") # Returns local GGUF path
10 # Internally: finds unsloth/Qwen3-8B-GGUF, picks Q4_K_M for 8GB GPU, downloads
11"""
13import logging
14import re
15import threading
16from pathlib import Path
17from typing import Dict, List, Optional, Tuple
19logger = logging.getLogger(__name__)
21# ── Quantization constants ───────────────────────────────────────────
23# Quantization preference order (best quality first)
24QUANT_PREFERENCE = [
25 'Q8_0', 'Q6_K_L', 'Q6_K', 'Q5_K_M', 'Q5_K_S',
26 'Q4_K_L', 'Q4_K_M', 'Q4_K_S', 'IQ4_XS', 'Q4_0',
27 'IQ3_M', 'IQ2_M', 'Q2_K',
28]
30# VRAM thresholds for auto-selection: (min_free_vram_gb, target_quant)
31VRAM_QUANT_MAP: List[Tuple[float, str]] = [
32 (24.0, 'Q8_0'),
33 (16.0, 'Q6_K'),
34 (8.0, 'Q4_K_M'),
35 (4.0, 'Q4_K_S'),
36 (0.0, 'Q4_0'),
37]
39# Regex to extract quant label from GGUF filenames.
40# Matches patterns like: Q4_K_M, Q8_0, IQ4_XS, Q6_K_L, Q2_K, etc.
41_QUANT_RE = re.compile(
42 r'(?:^|[._-])'
43 r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)'
44 r'(?:[._-]|$)',
45 re.IGNORECASE,
46)
49def _extract_quant(filename: str) -> Optional[str]:
50 """Extract quantization label from a GGUF filename.
52 Returns the quant string in upper-case (e.g. 'Q4_K_M') or None.
53 """
54 m = _QUANT_RE.search(filename)
55 if m:
56 return m.group(1).upper()
57 return None
60def _quant_rank(quant: str) -> int:
61 """Return rank of a quant in QUANT_PREFERENCE (lower = better quality).
63 Unknown quants get a high rank so known ones are preferred.
64 """
65 try:
66 return QUANT_PREFERENCE.index(quant)
67 except ValueError:
68 return len(QUANT_PREFERENCE) + 1
71class HFModelResolver:
72 """Resolve HuggingFace model names to local GGUF file paths.
74 Search strategy:
75 1. Unsloth GGUF repos (preferred — best GGUF exports)
76 2. Original org GGUF repos
77 3. bartowski GGUF repos (popular community uploader)
78 4. Original repo (may contain GGUF files directly)
80 Auto-selects quantization based on available VRAM via vram_manager.
81 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/.
82 """
84 def __init__(self):
85 self._download_lock = threading.Lock()
86 self._storage = None # lazy
87 self._hf_api = None # lazy
89 # ── Lazy accessors ───────────────────────────────────────────
91 def _get_storage(self):
92 """Lazy-load ModelStorageManager to avoid import cycles."""
93 if self._storage is None:
94 from .model_storage import ModelStorageManager
95 self._storage = ModelStorageManager()
96 return self._storage
98 def _get_hf_api(self):
99 """Lazy-load HfApi. Raises ImportError if huggingface_hub missing."""
100 if self._hf_api is None:
101 from huggingface_hub import HfApi
102 self._hf_api = HfApi()
103 return self._hf_api
105 def _get_gpu_info(self) -> Dict:
106 """Get GPU info from vram_manager singleton."""
107 try:
108 from .vram_manager import vram_manager
109 return vram_manager.detect_gpu()
110 except Exception as e:
111 logger.debug(f"GPU detection unavailable: {e}")
112 return {
113 'cuda_available': False,
114 'total_gb': 0.0,
115 'free_gb': 0.0,
116 'name': None,
117 }
119 # ── Main entry point ─────────────────────────────────────────
121 def resolve(self, model_name: str, quant: str = 'auto') -> Path:
122 """Resolve a HF model name to a local GGUF file path.
124 Args:
125 model_name: HuggingFace model identifier, e.g. "Qwen/Qwen3-8B"
126 or "meta-llama/Llama-3.1-8B".
127 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto'
128 to pick based on available VRAM.
130 Returns:
131 Path to the downloaded GGUF file on disk.
133 Raises:
134 FileNotFoundError: If no GGUF repo could be found.
135 RuntimeError: If download fails.
136 ImportError: If huggingface_hub is not installed.
137 """
138 logger.info(f"Resolving GGUF for {model_name} (quant={quant})")
140 # Step 1: find a repo that has GGUF files
141 repo_id = self.find_gguf_repo(model_name)
142 logger.info(f"Found GGUF repo: {repo_id}")
144 # Step 2: pick quantization
145 filename = self.select_quantization(repo_id, quant)
146 logger.info(f"Selected quantization file: {filename}")
148 # Step 3: download if needed
149 local_path = self.download(repo_id, filename)
150 logger.info(f"GGUF ready at: {local_path}")
152 return local_path
154 # ── Repo discovery ───────────────────────────────────────────
156 def find_gguf_repo(self, model_name: str) -> str:
157 """Search for a GGUF repo for the given model.
159 Search order (prefers Unsloth):
160 1. unsloth/{basename}-GGUF
161 2. {org}/{model}-GGUF
162 3. bartowski/{basename}-GGUF
163 4. {org}/{model} (original repo, check for .gguf files)
165 Args:
166 model_name: e.g. "Qwen/Qwen3-8B" or "meta-llama/Llama-3.1-8B"
168 Returns:
169 The repo_id string (e.g. "unsloth/Qwen3-8B-GGUF").
171 Raises:
172 FileNotFoundError: If no repo with GGUF files is found.
173 ImportError: If huggingface_hub is not installed.
174 """
175 # Parse org/basename
176 if '/' in model_name:
177 org, basename = model_name.split('/', 1)
178 else:
179 org = None
180 basename = model_name
182 candidates = [
183 f"unsloth/{basename}-GGUF",
184 ]
185 if org:
186 candidates.append(f"{org}/{basename}-GGUF")
187 candidates.append(f"bartowski/{basename}-GGUF")
188 # Original repo as last resort
189 if org:
190 candidates.append(f"{org}/{basename}")
191 else:
192 candidates.append(basename)
194 for repo_id in candidates:
195 gguf_files = self._list_gguf_files(repo_id)
196 if gguf_files:
197 logger.info(
198 f"Found {len(gguf_files)} GGUF file(s) in {repo_id}"
199 )
200 return repo_id
201 logger.debug(f"No GGUF files in {repo_id}")
203 raise FileNotFoundError(
204 f"No GGUF repository found for '{model_name}'. "
205 f"Searched: {', '.join(candidates)}"
206 )
208 def _list_gguf_files(self, repo_id: str) -> List[str]:
209 """List .gguf files in a HuggingFace repo.
211 Returns an empty list if the repo does not exist or has no GGUF files.
212 """
213 try:
214 api = self._get_hf_api()
215 all_files = api.list_repo_files(repo_id)
216 return [f for f in all_files if f.lower().endswith('.gguf')]
217 except ImportError:
218 raise
219 except Exception as e:
220 # Repo not found (404), rate limited, network error, etc.
221 logger.debug(f"Could not list files in {repo_id}: {e}")
222 return []
224 # ── Quantization selection ───────────────────────────────────
226 def select_quantization(self, repo_id: str, quant: str = 'auto') -> str:
227 """Select the best GGUF file from a repo.
229 If quant='auto', selects based on available VRAM:
230 >= 24GB free: Q8_0
231 >= 16GB free: Q6_K
232 >= 8GB free: Q4_K_M
233 >= 4GB free: Q4_K_S
234 CPU only: Q4_0
236 If a specific quant is requested (e.g. 'Q4_K_M'), finds the closest
237 available file.
239 Args:
240 repo_id: HuggingFace repo containing GGUF files.
241 quant: 'auto' or a specific quant label.
243 Returns:
244 Filename of the selected GGUF file.
246 Raises:
247 FileNotFoundError: If no suitable GGUF file is found.
248 """
249 gguf_files = self._list_gguf_files(repo_id)
250 if not gguf_files:
251 raise FileNotFoundError(
252 f"No GGUF files found in {repo_id}"
253 )
255 # Build a map of quant_label -> filename
256 quant_map: Dict[str, str] = {}
257 for fname in gguf_files:
258 label = _extract_quant(fname)
259 if label:
260 # If multiple files have the same quant, prefer smaller
261 # (single-file over split shards)
262 if label not in quant_map or len(fname) < len(quant_map[label]):
263 quant_map[label] = fname
265 if not quant_map:
266 # No recognizable quant labels — return the first GGUF file
267 logger.warning(
268 f"No quant labels recognized in {repo_id}; "
269 f"returning first GGUF file: {gguf_files[0]}"
270 )
271 return gguf_files[0]
273 # Determine target quant
274 if quant == 'auto':
275 target = self._auto_select_quant()
276 logger.info(f"Auto-selected target quant: {target}")
277 else:
278 target = quant.upper()
280 # Exact match
281 if target in quant_map:
282 return quant_map[target]
284 # Find the closest available quant by walking QUANT_PREFERENCE
285 # from the target's position downward (lower quality), then upward.
286 target_rank = _quant_rank(target)
287 available = sorted(quant_map.keys(), key=_quant_rank)
289 # Prefer the next-lower quality that exists
290 best_file = None
291 best_distance = float('inf')
292 for q in available:
293 distance = abs(_quant_rank(q) - target_rank)
294 if distance < best_distance:
295 best_distance = distance
296 best_file = quant_map[q]
297 if distance == 0:
298 break # exact match
300 if best_file is None:
301 # Should not happen (quant_map is non-empty) but be safe
302 best_file = next(iter(quant_map.values()))
304 logger.info(
305 f"Requested {target}, best available: "
306 f"{_extract_quant(best_file)} -> {best_file}"
307 )
308 return best_file
310 def _auto_select_quant(self) -> str:
311 """Pick a quant target based on current free VRAM."""
312 gpu_info = self._get_gpu_info()
313 free_gb = gpu_info.get('free_gb', 0.0)
315 if not gpu_info.get('cuda_available', False):
316 logger.info("No GPU detected, targeting CPU-friendly Q4_0")
317 return 'Q4_0'
319 for threshold, quant in VRAM_QUANT_MAP:
320 if free_gb >= threshold:
321 logger.info(
322 f"Free VRAM: {free_gb:.1f} GB >= {threshold} GB, "
323 f"targeting {quant}"
324 )
325 return quant
327 # Fallback (should not reach here since 0.0 is in the map)
328 return 'Q4_0'
330 @staticmethod
331 def _validate_gguf(path: Path) -> bool:
332 """Check GGUF magic bytes (0x47475546 = 'GGUF') at file start."""
333 try:
334 with open(path, 'rb') as f:
335 magic = f.read(4)
336 return magic == b'GGUF'
337 except Exception:
338 return False
340 # ── Download ─────────────────────────────────────────────────
342 def download(self, repo_id: str, filename: str) -> Path:
343 """Download a GGUF file from HuggingFace.
345 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/{filename}.
346 Thread-safe: only one download runs at a time.
347 Skips download if file already exists with non-zero size.
348 Updates the ModelStorageManager manifest on success.
350 Args:
351 repo_id: HuggingFace repo, e.g. "unsloth/Qwen3-8B-GGUF".
352 filename: GGUF filename within the repo.
354 Returns:
355 Path to the local GGUF file.
357 Raises:
358 RuntimeError: If the download fails.
359 ImportError: If huggingface_hub is not installed.
360 """
361 # Build local path
362 repo_safe = repo_id.replace('/', '--')
363 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf' / repo_safe
364 local_path = gguf_dir / filename
366 # Skip if already downloaded AND valid GGUF (magic bytes check)
367 if local_path.exists() and local_path.stat().st_size > 0:
368 if self._validate_gguf(local_path):
369 logger.info(f"Already downloaded: {local_path}")
370 return local_path
371 else:
372 logger.warning(f"Corrupt/partial GGUF detected, re-downloading: {local_path}")
373 local_path.unlink(missing_ok=True)
375 with self._download_lock:
376 # Double-check after acquiring lock
377 if local_path.exists() and local_path.stat().st_size > 0:
378 if self._validate_gguf(local_path):
379 logger.info(f"Already downloaded (post-lock): {local_path}")
380 return local_path
381 else:
382 local_path.unlink(missing_ok=True)
384 gguf_dir.mkdir(parents=True, exist_ok=True)
386 logger.info(
387 f"Downloading {filename} from {repo_id} "
388 f"to {gguf_dir}..."
389 )
391 try:
392 from huggingface_hub import hf_hub_download
394 downloaded_path = hf_hub_download(
395 repo_id=repo_id,
396 filename=filename,
397 local_dir=str(gguf_dir),
398 local_dir_use_symlinks=False,
399 )
400 downloaded_path = Path(downloaded_path)
402 # hf_hub_download may place the file in a subfolder or
403 # directly in local_dir — ensure we return the right path.
404 if downloaded_path.exists():
405 actual_path = downloaded_path
406 elif local_path.exists():
407 actual_path = local_path
408 else:
409 raise RuntimeError(
410 f"Download completed but file not found at "
411 f"{downloaded_path} or {local_path}"
412 )
414 size_bytes = actual_path.stat().st_size
415 size_gb = size_bytes / (1024 ** 3)
416 logger.info(
417 f"Download complete: {actual_path.name} "
418 f"({size_gb:.2f} GB)"
419 )
421 # Update manifest
422 try:
423 storage = self._get_storage()
424 tool_name = f"gguf/{repo_safe}"
425 storage.mark_downloaded(
426 tool_name,
427 source_url=f"hf://{repo_id}/{filename}",
428 size_bytes=size_bytes,
429 )
430 except Exception as e:
431 logger.warning(f"Manifest update failed: {e}")
433 return actual_path
435 except ImportError:
436 raise ImportError(
437 "huggingface_hub is required for GGUF downloads. "
438 "Install it with: pip install huggingface_hub"
439 )
440 except Exception as e:
441 logger.error(f"Download failed for {repo_id}/{filename}: {e}")
442 raise RuntimeError(
443 f"Failed to download {filename} from {repo_id}: {e}"
444 ) from e
446 # ── Listing ──────────────────────────────────────────────────
448 def list_available(self, model_name: str) -> List[Dict]:
449 """List all available GGUF files for a model.
451 Searches all candidate repos (Unsloth, original, bartowski) and
452 returns a consolidated list of available files.
454 Args:
455 model_name: e.g. "Qwen/Qwen3-8B"
457 Returns:
458 List of dicts with keys:
459 - repo_id: str
460 - filename: str
461 - quant: str or None
462 - quant_rank: int (lower = better quality)
463 - size_bytes: int or None (if available from API)
464 """
465 if '/' in model_name:
466 org, basename = model_name.split('/', 1)
467 else:
468 org = None
469 basename = model_name
471 candidates = [f"unsloth/{basename}-GGUF"]
472 if org:
473 candidates.append(f"{org}/{basename}-GGUF")
474 candidates.append(f"bartowski/{basename}-GGUF")
475 if org:
476 candidates.append(f"{org}/{basename}")
478 results: List[Dict] = []
479 seen_files = set()
481 for repo_id in candidates:
482 try:
483 api = self._get_hf_api()
484 repo_info = api.list_repo_tree(repo_id)
485 for item in repo_info:
486 # item is a RepoFile or RepoFolder
487 fname = getattr(item, 'rfilename', None)
488 if fname is None:
489 # Might be a RepoFolder or different API object
490 fname = getattr(item, 'path', None)
491 if not fname or not fname.lower().endswith('.gguf'):
492 continue
493 # Deduplicate by filename
494 key = f"{repo_id}/{fname}"
495 if key in seen_files:
496 continue
497 seen_files.add(key)
499 quant = _extract_quant(fname)
500 size = getattr(item, 'size', None)
502 results.append({
503 'repo_id': repo_id,
504 'filename': fname,
505 'quant': quant,
506 'quant_rank': _quant_rank(quant) if quant else 999,
507 'size_bytes': size,
508 })
509 except ImportError:
510 raise
511 except Exception as e:
512 logger.debug(f"Could not list {repo_id}: {e}")
514 # Sort by quant quality (best first)
515 results.sort(key=lambda r: r['quant_rank'])
516 return results
519# ── Singleton ────────────────────────────────────────────────────────
521_resolver: Optional[HFModelResolver] = None
522_resolver_lock = threading.Lock()
525def get_resolver() -> HFModelResolver:
526 """Get or create the global HFModelResolver singleton."""
527 global _resolver
528 if _resolver is None:
529 with _resolver_lock:
530 if _resolver is None:
531 _resolver = HFModelResolver()
532 return _resolver