Coverage for integrations/service_tools/hf_model

1"""

2HF Model Resolver — find and download the best GGUF quantization from HuggingFace.

4Prefers Unsloth quantizations (fastest fine-tuning tool, best GGUF exports).

5Auto-selects quantization level based on available VRAM.

7Usage:

8 resolver = HFModelResolver()

9 path = resolver.resolve("Qwen/Qwen3-8B") # Returns local GGUF path

10 # Internally: finds unsloth/Qwen3-8B-GGUF, picks Q4_K_M for 8GB GPU, downloads

11"""

13import logging

14import re

15import threading

16from pathlib import Path

17from typing import Dict, List, Optional, Tuple

19logger = logging.getLogger(__name__)

21# ── Quantization constants ───────────────────────────────────────────

23# Quantization preference order (best quality first)

24QUANT_PREFERENCE = [

25 'Q8_0', 'Q6_K_L', 'Q6_K', 'Q5_K_M', 'Q5_K_S',

26 'Q4_K_L', 'Q4_K_M', 'Q4_K_S', 'IQ4_XS', 'Q4_0',

27 'IQ3_M', 'IQ2_M', 'Q2_K',

28]

30# VRAM thresholds for auto-selection: (min_free_vram_gb, target_quant)

31VRAM_QUANT_MAP: List[Tuple[float, str]] = [

32 (24.0, 'Q8_0'),

33 (16.0, 'Q6_K'),

34 (8.0, 'Q4_K_M'),

35 (4.0, 'Q4_K_S'),

36 (0.0, 'Q4_0'),

37]

39# Regex to extract quant label from GGUF filenames.

40# Matches patterns like: Q4_K_M, Q8_0, IQ4_XS, Q6_K_L, Q2_K, etc.

41_QUANT_RE = re.compile(

42 r'(?:^|[._-])'

43 r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)'

44 r'(?:[._-]|$)',

45 re.IGNORECASE,

46)

49def _extract_quant(filename: str) -> Optional[str]:

50 """Extract quantization label from a GGUF filename.

52 Returns the quant string in upper-case (e.g. 'Q4_K_M') or None.

53 """

54 m = _QUANT_RE.search(filename)

55 if m:

56 return m.group(1).upper()

57 return None

60def _quant_rank(quant: str) -> int:

61 """Return rank of a quant in QUANT_PREFERENCE (lower = better quality).

63 Unknown quants get a high rank so known ones are preferred.

64 """

65 try:

66 return QUANT_PREFERENCE.index(quant)

67 except ValueError:

68 return len(QUANT_PREFERENCE) + 1

71class HFModelResolver:

72 """Resolve HuggingFace model names to local GGUF file paths.

74 Search strategy:

75 1. Unsloth GGUF repos (preferred — best GGUF exports)

76 2. Original org GGUF repos

77 3. bartowski GGUF repos (popular community uploader)

78 4. Original repo (may contain GGUF files directly)

80 Auto-selects quantization based on available VRAM via vram_manager.

81 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/.

82 """

84 def __init__(self):

85 self._download_lock = threading.Lock()

86 self._storage = None # lazy

87 self._hf_api = None # lazy

89 # ── Lazy accessors ───────────────────────────────────────────

91 def _get_storage(self):

92 """Lazy-load ModelStorageManager to avoid import cycles."""

93 if self._storage is None:

94 from .model_storage import ModelStorageManager

95 self._storage = ModelStorageManager()

96 return self._storage

98 def _get_hf_api(self):

99 """Lazy-load HfApi. Raises ImportError if huggingface_hub missing."""

100 if self._hf_api is None:

101 from huggingface_hub import HfApi

102 self._hf_api = HfApi()

103 return self._hf_api

104

105 def _get_gpu_info(self) -> Dict:

106 """Get GPU info from vram_manager singleton."""

107 try:

108 from .vram_manager import vram_manager

109 return vram_manager.detect_gpu()

110 except Exception as e:

111 logger.debug(f"GPU detection unavailable: {e}")

112 return {

113 'cuda_available': False,

114 'total_gb': 0.0,

115 'free_gb': 0.0,

116 'name': None,

117 }

118

119 # ── Main entry point ─────────────────────────────────────────

120

121 def resolve(self, model_name: str, quant: str = 'auto') -> Path:

122 """Resolve a HF model name to a local GGUF file path.

123

124 Args:

125 model_name: HuggingFace model identifier, e.g. "Qwen/Qwen3-8B"

126 or "meta-llama/Llama-3.1-8B".

127 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto'

128 to pick based on available VRAM.

129

130 Returns:

131 Path to the downloaded GGUF file on disk.

132

133 Raises:

134 FileNotFoundError: If no GGUF repo could be found.

135 RuntimeError: If download fails.

136 ImportError: If huggingface_hub is not installed.

137 """

138 logger.info(f"Resolving GGUF for {model_name} (quant={quant})")

139

140 # Step 1: find a repo that has GGUF files

141 repo_id = self.find_gguf_repo(model_name)

142 logger.info(f"Found GGUF repo: {repo_id}")

143

144 # Step 2: pick quantization

145 filename = self.select_quantization(repo_id, quant)

146 logger.info(f"Selected quantization file: {filename}")

147

148 # Step 3: download if needed

149 local_path = self.download(repo_id, filename)

150 logger.info(f"GGUF ready at: {local_path}")

151

152 return local_path

153

154 # ── Repo discovery ───────────────────────────────────────────

155

156 def find_gguf_repo(self, model_name: str) -> str:

157 """Search for a GGUF repo for the given model.

158

159 Search order (prefers Unsloth):

160 1. unsloth/{basename}-GGUF

161 2. {org}/{model}-GGUF

162 3. bartowski/{basename}-GGUF

163 4. {org}/{model} (original repo, check for .gguf files)

164

165 Args:

166 model_name: e.g. "Qwen/Qwen3-8B" or "meta-llama/Llama-3.1-8B"

167

168 Returns:

169 The repo_id string (e.g. "unsloth/Qwen3-8B-GGUF").

170

171 Raises:

172 FileNotFoundError: If no repo with GGUF files is found.

173 ImportError: If huggingface_hub is not installed.

174 """

175 # Parse org/basename

176 if '/' in model_name:

177 org, basename = model_name.split('/', 1)

178 else:

179 org = None

180 basename = model_name

181

182 candidates = [

183 f"unsloth/{basename}-GGUF",

184 ]

185 if org:

186 candidates.append(f"{org}/{basename}-GGUF")

187 candidates.append(f"bartowski/{basename}-GGUF")

188 # Original repo as last resort

189 if org:

190 candidates.append(f"{org}/{basename}")

191 else:

192 candidates.append(basename)

193

194 for repo_id in candidates:

195 gguf_files = self._list_gguf_files(repo_id)

196 if gguf_files:

197 logger.info(

198 f"Found {len(gguf_files)} GGUF file(s) in {repo_id}"

199 )

200 return repo_id

201 logger.debug(f"No GGUF files in {repo_id}")

202

203 raise FileNotFoundError(

204 f"No GGUF repository found for '{model_name}'. "

205 f"Searched: {', '.join(candidates)}"

206 )

207

208 def _list_gguf_files(self, repo_id: str) -> List[str]:

209 """List .gguf files in a HuggingFace repo.

210

211 Returns an empty list if the repo does not exist or has no GGUF files.

212 """

213 try:

214 api = self._get_hf_api()

215 all_files = api.list_repo_files(repo_id)

216 return [f for f in all_files if f.lower().endswith('.gguf')]

217 except ImportError:

218 raise

219 except Exception as e:

220 # Repo not found (404), rate limited, network error, etc.

221 logger.debug(f"Could not list files in {repo_id}: {e}")

222 return []

223

224 # ── Quantization selection ───────────────────────────────────

225

226 def select_quantization(self, repo_id: str, quant: str = 'auto') -> str:

227 """Select the best GGUF file from a repo.

228

229 If quant='auto', selects based on available VRAM:

230 >= 24GB free: Q8_0

231 >= 16GB free: Q6_K

232 >= 8GB free: Q4_K_M

233 >= 4GB free: Q4_K_S

234 CPU only: Q4_0

235

236 If a specific quant is requested (e.g. 'Q4_K_M'), finds the closest

237 available file.

238

239 Args:

240 repo_id: HuggingFace repo containing GGUF files.

241 quant: 'auto' or a specific quant label.

242

243 Returns:

244 Filename of the selected GGUF file.

245

246 Raises:

247 FileNotFoundError: If no suitable GGUF file is found.

248 """

249 gguf_files = self._list_gguf_files(repo_id)

250 if not gguf_files:

251 raise FileNotFoundError(

252 f"No GGUF files found in {repo_id}"

253 )

254

255 # Build a map of quant_label -> filename

256 quant_map: Dict[str, str] = {}

257 for fname in gguf_files:

258 label = _extract_quant(fname)

259 if label:

260 # If multiple files have the same quant, prefer smaller

261 # (single-file over split shards)

262 if label not in quant_map or len(fname) < len(quant_map[label]):

263 quant_map[label] = fname

264

265 if not quant_map:

266 # No recognizable quant labels — return the first GGUF file

267 logger.warning(

268 f"No quant labels recognized in {repo_id}; "

269 f"returning first GGUF file: {gguf_files[0]}"

270 )

271 return gguf_files[0]

272

273 # Determine target quant

274 if quant == 'auto':

275 target = self._auto_select_quant()

276 logger.info(f"Auto-selected target quant: {target}")

277 else:

278 target = quant.upper()

279

280 # Exact match

281 if target in quant_map:

282 return quant_map[target]

283

284 # Find the closest available quant by walking QUANT_PREFERENCE

285 # from the target's position downward (lower quality), then upward.

286 target_rank = _quant_rank(target)

287 available = sorted(quant_map.keys(), key=_quant_rank)

288

289 # Prefer the next-lower quality that exists

290 best_file = None

291 best_distance = float('inf')

292 for q in available:

293 distance = abs(_quant_rank(q) - target_rank)

294 if distance < best_distance:

295 best_distance = distance

296 best_file = quant_map[q]

297 if distance == 0:

298 break # exact match

299

300 if best_file is None:

301 # Should not happen (quant_map is non-empty) but be safe

302 best_file = next(iter(quant_map.values()))

303

304 logger.info(

305 f"Requested {target}, best available: "

306 f"{_extract_quant(best_file)} -> {best_file}"

307 )

308 return best_file

309

310 def _auto_select_quant(self) -> str:

311 """Pick a quant target based on current free VRAM."""

312 gpu_info = self._get_gpu_info()

313 free_gb = gpu_info.get('free_gb', 0.0)

314

315 if not gpu_info.get('cuda_available', False):

316 logger.info("No GPU detected, targeting CPU-friendly Q4_0")

317 return 'Q4_0'

318

319 for threshold, quant in VRAM_QUANT_MAP:

320 if free_gb >= threshold:

321 logger.info(

322 f"Free VRAM: {free_gb:.1f} GB >= {threshold} GB, "

323 f"targeting {quant}"

324 )

325 return quant

326

327 # Fallback (should not reach here since 0.0 is in the map)

328 return 'Q4_0'

329

330 @staticmethod

331 def _validate_gguf(path: Path) -> bool:

332 """Check GGUF magic bytes (0x47475546 = 'GGUF') at file start."""

333 try:

334 with open(path, 'rb') as f:

335 magic = f.read(4)

336 return magic == b'GGUF'

337 except Exception:

338 return False

339

340 # ── Download ─────────────────────────────────────────────────

341

342 def download(self, repo_id: str, filename: str) -> Path:

343 """Download a GGUF file from HuggingFace.

344

345 Downloads to ~/.hevolve/models/gguf/{repo_safe_name}/{filename}.

346 Thread-safe: only one download runs at a time.

347 Skips download if file already exists with non-zero size.

348 Updates the ModelStorageManager manifest on success.

349

350 Args:

351 repo_id: HuggingFace repo, e.g. "unsloth/Qwen3-8B-GGUF".

352 filename: GGUF filename within the repo.

353

354 Returns:

355 Path to the local GGUF file.

356

357 Raises:

358 RuntimeError: If the download fails.

359 ImportError: If huggingface_hub is not installed.

360 """

361 # Build local path

362 repo_safe = repo_id.replace('/', '--')

363 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf' / repo_safe

364 local_path = gguf_dir / filename

365

366 # Skip if already downloaded AND valid GGUF (magic bytes check)

367 if local_path.exists() and local_path.stat().st_size > 0:

368 if self._validate_gguf(local_path):

369 logger.info(f"Already downloaded: {local_path}")

370 return local_path

371 else:

372 logger.warning(f"Corrupt/partial GGUF detected, re-downloading: {local_path}")

373 local_path.unlink(missing_ok=True)

374

375 with self._download_lock:

376 # Double-check after acquiring lock

377 if local_path.exists() and local_path.stat().st_size > 0:

378 if self._validate_gguf(local_path):

379 logger.info(f"Already downloaded (post-lock): {local_path}")

380 return local_path

381 else:

382 local_path.unlink(missing_ok=True)

383

384 gguf_dir.mkdir(parents=True, exist_ok=True)

385

386 logger.info(

387 f"Downloading {filename} from {repo_id} "

388 f"to {gguf_dir}..."

389 )

390

391 try:

392 from huggingface_hub import hf_hub_download

393

394 downloaded_path = hf_hub_download(

395 repo_id=repo_id,

396 filename=filename,

397 local_dir=str(gguf_dir),

398 local_dir_use_symlinks=False,

399 )

400 downloaded_path = Path(downloaded_path)

401

402 # hf_hub_download may place the file in a subfolder or

403 # directly in local_dir — ensure we return the right path.

404 if downloaded_path.exists():

405 actual_path = downloaded_path

406 elif local_path.exists():

407 actual_path = local_path

408 else:

409 raise RuntimeError(

410 f"Download completed but file not found at "

411 f"{downloaded_path} or {local_path}"

412 )

413

414 size_bytes = actual_path.stat().st_size

415 size_gb = size_bytes / (1024 ** 3)

416 logger.info(

417 f"Download complete: {actual_path.name} "

418 f"({size_gb:.2f} GB)"

419 )

420

421 # Update manifest

422 try:

423 storage = self._get_storage()

424 tool_name = f"gguf/{repo_safe}"

425 storage.mark_downloaded(

426 tool_name,

427 source_url=f"hf://{repo_id}/{filename}",

428 size_bytes=size_bytes,

429 )

430 except Exception as e:

431 logger.warning(f"Manifest update failed: {e}")

432

433 return actual_path

434

435 except ImportError:

436 raise ImportError(

437 "huggingface_hub is required for GGUF downloads. "

438 "Install it with: pip install huggingface_hub"

439 )

440 except Exception as e:

441 logger.error(f"Download failed for {repo_id}/{filename}: {e}")

442 raise RuntimeError(

443 f"Failed to download {filename} from {repo_id}: {e}"

444 ) from e

445

446 # ── Listing ──────────────────────────────────────────────────

447

448 def list_available(self, model_name: str) -> List[Dict]:

449 """List all available GGUF files for a model.

450

451 Searches all candidate repos (Unsloth, original, bartowski) and

452 returns a consolidated list of available files.

453

454 Args:

455 model_name: e.g. "Qwen/Qwen3-8B"

456

457 Returns:

458 List of dicts with keys:

459 - repo_id: str

460 - filename: str

461 - quant: str or None

462 - quant_rank: int (lower = better quality)

463 - size_bytes: int or None (if available from API)

464 """

465 if '/' in model_name:

466 org, basename = model_name.split('/', 1)

467 else:

468 org = None

469 basename = model_name

470

471 candidates = [f"unsloth/{basename}-GGUF"]

472 if org:

473 candidates.append(f"{org}/{basename}-GGUF")

474 candidates.append(f"bartowski/{basename}-GGUF")

475 if org:

476 candidates.append(f"{org}/{basename}")

477

478 results: List[Dict] = []

479 seen_files = set()

480

481 for repo_id in candidates:

482 try:

483 api = self._get_hf_api()

484 repo_info = api.list_repo_tree(repo_id)

485 for item in repo_info:

486 # item is a RepoFile or RepoFolder

487 fname = getattr(item, 'rfilename', None)

488 if fname is None:

489 # Might be a RepoFolder or different API object

490 fname = getattr(item, 'path', None)

491 if not fname or not fname.lower().endswith('.gguf'):

492 continue

493 # Deduplicate by filename

494 key = f"{repo_id}/{fname}"

495 if key in seen_files:

496 continue

497 seen_files.add(key)

498

499 quant = _extract_quant(fname)

500 size = getattr(item, 'size', None)

501

502 results.append({

503 'repo_id': repo_id,

504 'filename': fname,

505 'quant': quant,

506 'quant_rank': _quant_rank(quant) if quant else 999,

507 'size_bytes': size,

508 })

509 except ImportError:

510 raise

511 except Exception as e:

512 logger.debug(f"Could not list {repo_id}: {e}")

513

514 # Sort by quant quality (best first)

515 results.sort(key=lambda r: r['quant_rank'])

516 return results

517

518

519# ── Singleton ────────────────────────────────────────────────────────

520

521_resolver: Optional[HFModelResolver] = None

522_resolver_lock = threading.Lock()

523

524

525def get_resolver() -> HFModelResolver:

526 """Get or create the global HFModelResolver singleton."""

527 global _resolver

528 if _resolver is None:

529 with _resolver_lock:

530 if _resolver is None:

531 _resolver = HFModelResolver()

532 return _resolver

Coverage for integrations / service_tools / hf_model_resolver.py: 0.0%

197 statements