Coverage for integrations / service_tools / model_onboarding.py: 17.9%

313 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Model Onboarding — one-command flow to go from model name to running inference. 

3 

4 onboard("Qwen/Qwen3-8B") 

5 # 1. Finds unsloth/Qwen3-8B-GGUF on HuggingFace 

6 # 2. Picks Q4_K_M quantization for user's GPU 

7 # 3. Downloads GGUF to ~/.hevolve/models/ 

8 # 4. Ensures llama.cpp binary is available 

9 # 5. Starts llama-server with optimal params 

10 # 6. Registers model in catalog + registry 

11 # 7. Returns endpoint URL ready for inference 

12 

13Also provides CLI/API for listing available models, switching active model, 

14and removing downloaded models. 

15""" 

16 

17import logging 

18import re 

19import threading 

20import time 

21from pathlib import Path 

22from typing import Dict, List, Optional 

23 

24logger = logging.getLogger(__name__) 

25 

26 

27# ── Nunba companion detection ────────────────────────────────────── 

28 

29def _is_nunba_bundled() -> bool: 

30 """Detect if Nunba (companion desktop app) is managing llama.cpp. 

31 

32 When HARTOS is pip-installed inside Nunba, `hartos_backend_adapter` is 

33 in sys.modules. Nunba owns: llama.cpp lifecycle, model downloads, 

34 config.json, port 8080. We should not duplicate that work. 

35 """ 

36 import sys 

37 return 'hartos_backend_adapter' in sys.modules 

38 

39 

40def _onboard_via_nunba(model_name: str, quant: str, port: int) -> dict: 

41 """Onboard a model by delegating to Nunba's existing infrastructure. 

42 

43 Nunba already has llama.cpp running on port 8080. We just need to 

44 tell it to load a different model via its adapter, or check if 

45 the requested model is already active. 

46 """ 

47 import urllib.request 

48 import urllib.error 

49 import json 

50 

51 # Check if Nunba's llama.cpp is already running 

52 llm_port = 8080 

53 try: 

54 req = urllib.request.urlopen(f'http://127.0.0.1:{llm_port}/health', timeout=3) 

55 if req.status == 200: 

56 logger.info("Nunba's llama.cpp already running on port %d", llm_port) 

57 return { 

58 'status': 'ready', 

59 'model': model_name, 

60 'quant': quant, 

61 'endpoint': f'http://127.0.0.1:{llm_port}', 

62 'source': 'nunba', 

63 'note': 'Nunba manages the llama.cpp server. ' 

64 'Use Nunba settings to change models.', 

65 } 

66 except (urllib.error.URLError, OSError): 

67 pass 

68 

69 # Nunba not running yet — tell the user 

70 return { 

71 'status': 'waiting', 

72 'model': model_name, 

73 'endpoint': f'http://127.0.0.1:{llm_port}', 

74 'source': 'nunba', 

75 'note': 'HARTOS is bundled with Nunba. Start the Nunba desktop app ' 

76 'to activate llama.cpp inference. Nunba manages model lifecycle.', 

77 } 

78 

79 

80# ── Module-level state ────────────────────────────────────────────── 

81 

82_onboard_lock = threading.Lock() 

83_active_model: Optional[Dict] = None # tracks the currently running model 

84 

85 

86# ── Lazy imports (all behind try/except) ──────────────────────────── 

87 

88def _get_resolver(): 

89 """Lazy-load HFModelResolver singleton.""" 

90 try: 

91 from integrations.service_tools.hf_model_resolver import get_resolver 

92 return get_resolver() 

93 except ImportError: 

94 logger.warning("hf_model_resolver not available") 

95 return None 

96 

97 

98def _get_llamacpp_manager(): 

99 """Lazy-load llamacpp_manager singleton.""" 

100 try: 

101 from integrations.service_tools.llamacpp_manager import get_llamacpp_manager 

102 return get_llamacpp_manager() 

103 except ImportError: 

104 logger.warning("llamacpp_manager not available") 

105 return None 

106 

107 

108def _get_catalog(): 

109 """Lazy-load ModelCatalog singleton.""" 

110 try: 

111 from integrations.service_tools.model_catalog import get_catalog 

112 return get_catalog() 

113 except ImportError: 

114 logger.warning("model_catalog not available") 

115 return None 

116 

117 

118def _get_model_registry(): 

119 """Lazy-load ModelRegistry singleton.""" 

120 try: 

121 from integrations.agent_engine.model_registry import model_registry 

122 return model_registry 

123 except ImportError: 

124 logger.warning("model_registry not available") 

125 return None 

126 

127 

128def _get_vram_manager(): 

129 """Lazy-load VRAMManager singleton.""" 

130 try: 

131 from integrations.service_tools.vram_manager import vram_manager 

132 return vram_manager 

133 except ImportError: 

134 logger.warning("vram_manager not available") 

135 return None 

136 

137 

138def _get_default_port() -> int: 

139 """Get the llama.cpp port from port_registry, default 8080.""" 

140 try: 

141 from core.port_registry import get_port 

142 return get_port('llm') 

143 except Exception: 

144 return 8080 

145 

146 

147def _make_catalog_id(model_name: str, quant: str) -> str: 

148 """Create a stable catalog ID from model name and quantization. 

149 

150 E.g. "Qwen/Qwen3-8B" + "Q4_K_M" -> "llm-qwen3-8b-q4-k-m" 

151 """ 

152 # Take basename from repo-style names 

153 if '/' in model_name: 

154 basename = model_name.split('/')[-1] 

155 else: 

156 basename = model_name 

157 slug = re.sub(r'[^a-z0-9]+', '-', basename.lower()).strip('-') 

158 quant_slug = re.sub(r'[^a-z0-9]+', '-', quant.lower()).strip('-') 

159 return f"llm-{slug}-{quant_slug}" 

160 

161 

162def _extract_quant_from_path(gguf_path: Path) -> str: 

163 """Extract quantization label from a GGUF filename.""" 

164 try: 

165 from integrations.service_tools.hf_model_resolver import _extract_quant 

166 q = _extract_quant(gguf_path.name) 

167 if q: 

168 return q 

169 except ImportError: 

170 pass 

171 # Fallback regex 

172 m = re.search(r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)', gguf_path.name, re.IGNORECASE) 

173 return m.group(1).upper() if m else 'unknown' 

174 

175 

176# ── Core functions ────────────────────────────────────────────────── 

177 

178def onboard(model_name: str, quant: str = 'auto', port: int = 0) -> Dict: 

179 """Full onboarding pipeline: resolve, download, start, register. 

180 

181 Args: 

182 model_name: HuggingFace model identifier (e.g. "Qwen/Qwen3-8B"). 

183 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto'. 

184 port: Port for llama-server. 0 = use port registry default. 

185 

186 Returns: 

187 Status dict with keys: status, model, quant, endpoint, gguf_path. 

188 On error: status='error', error=<message>. 

189 """ 

190 global _active_model 

191 

192 if port == 0: 

193 port = _get_default_port() 

194 

195 # ── Nunba companion detection ── 

196 # When Nunba (sibling repo) is installed, it owns the llama.cpp lifecycle 

197 # and model management. We defer to it instead of duplicating. 

198 if _is_nunba_bundled(): 

199 return _onboard_via_nunba(model_name, quant, port) 

200 

201 with _onboard_lock: 

202 try: 

203 # Step 1: Resolve and download GGUF 

204 resolver = _get_resolver() 

205 if resolver is None: 

206 return { 

207 'status': 'error', 

208 'error': 'hf_model_resolver is not available. ' 

209 'Install huggingface_hub: pip install huggingface_hub', 

210 } 

211 

212 logger.info(f"Onboarding {model_name} (quant={quant}, port={port})") 

213 gguf_path = resolver.resolve(model_name, quant) 

214 quant_used = _extract_quant_from_path(gguf_path) 

215 

216 # Step 2: Ensure llama.cpp binary is available 

217 lcpp = _get_llamacpp_manager() 

218 if lcpp is None: 

219 return { 

220 'status': 'error', 

221 'error': 'llamacpp_manager is not available', 

222 } 

223 

224 server_bin = lcpp.get_server_binary() 

225 if server_bin is None: 

226 logger.info("llama-server binary not found, downloading...") 

227 lcpp.download_server() 

228 server_bin = lcpp.get_server_binary() 

229 if server_bin is None: 

230 return { 

231 'status': 'error', 

232 'error': 'Failed to obtain llama-server binary', 

233 } 

234 

235 # Step 3: Start llama-server 

236 logger.info(f"Starting llama-server on port {port}...") 

237 if not lcpp.start(str(gguf_path), port): 

238 return { 

239 'status': 'error', 

240 'error': f'llama-server failed to start on port {port}. ' 

241 f'Check if the port is in use or the GGUF file is valid.', 

242 } 

243 

244 # Step 4: Register in catalog 

245 catalog_id = _make_catalog_id(model_name, quant_used) 

246 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port) 

247 

248 # Step 5: Register in model registry 

249 _register_in_registry(catalog_id, model_name, port) 

250 

251 # Step 6: Track active model 

252 endpoint = f'http://127.0.0.1:{port}' 

253 _active_model = { 

254 'catalog_id': catalog_id, 

255 'model': model_name, 

256 'quant': quant_used, 

257 'endpoint': endpoint, 

258 'gguf_path': str(gguf_path), 

259 'port': port, 

260 'started_at': time.time(), 

261 } 

262 

263 result = { 

264 'status': 'ready', 

265 'model': model_name, 

266 'quant': quant_used, 

267 'endpoint': endpoint, 

268 'gguf_path': str(gguf_path), 

269 } 

270 logger.info(f"Onboarding complete: {model_name} ({quant_used}) at {endpoint}") 

271 return result 

272 

273 except FileNotFoundError as e: 

274 logger.error(f"Onboard failed — model not found: {e}") 

275 return {'status': 'error', 'error': str(e)} 

276 except ImportError as e: 

277 logger.error(f"Onboard failed — missing dependency: {e}") 

278 return {'status': 'error', 'error': str(e)} 

279 except Exception as e: 

280 logger.error(f"Onboard failed: {e}", exc_info=True) 

281 return {'status': 'error', 'error': str(e)} 

282 

283 

284def switch_model(model_name: str, quant: str = 'auto') -> Dict: 

285 """Hot-swap the active model without full restart. 

286 

287 Resolves and downloads if needed, then calls llamacpp_manager.swap_model(). 

288 Updates catalog and registry entries. 

289 

290 Args: 

291 model_name: HuggingFace model identifier. 

292 quant: Quantization level or 'auto'. 

293 

294 Returns: 

295 Status dict. On error: status='error', error=<message>. 

296 """ 

297 global _active_model 

298 

299 with _onboard_lock: 

300 try: 

301 # Resolve + download 

302 resolver = _get_resolver() 

303 if resolver is None: 

304 return { 

305 'status': 'error', 

306 'error': 'hf_model_resolver is not available', 

307 } 

308 

309 gguf_path = resolver.resolve(model_name, quant) 

310 quant_used = _extract_quant_from_path(gguf_path) 

311 

312 # Swap model in running server 

313 lcpp = _get_llamacpp_manager() 

314 if lcpp is None: 

315 return { 

316 'status': 'error', 

317 'error': 'llamacpp_manager is not available', 

318 } 

319 

320 logger.info(f"Swapping to {model_name} ({quant_used})...") 

321 lcpp.swap_model(str(gguf_path)) 

322 

323 # Unmark previous active model in catalog 

324 if _active_model: 

325 catalog = _get_catalog() 

326 if catalog: 

327 catalog.mark_unloaded(_active_model.get('catalog_id', '')) 

328 

329 # Register new model 

330 catalog_id = _make_catalog_id(model_name, quant_used) 

331 port = (_active_model or {}).get('port', _get_default_port()) 

332 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port) 

333 _register_in_registry(catalog_id, model_name, port) 

334 

335 endpoint = f'http://127.0.0.1:{port}' 

336 _active_model = { 

337 'catalog_id': catalog_id, 

338 'model': model_name, 

339 'quant': quant_used, 

340 'endpoint': endpoint, 

341 'gguf_path': str(gguf_path), 

342 'port': port, 

343 'started_at': time.time(), 

344 } 

345 

346 result = { 

347 'status': 'ready', 

348 'model': model_name, 

349 'quant': quant_used, 

350 'endpoint': endpoint, 

351 'gguf_path': str(gguf_path), 

352 } 

353 logger.info(f"Model swap complete: {model_name} ({quant_used})") 

354 return result 

355 

356 except FileNotFoundError as e: 

357 logger.error(f"Switch failed — model not found: {e}") 

358 return {'status': 'error', 'error': str(e)} 

359 except Exception as e: 

360 logger.error(f"Switch failed: {e}", exc_info=True) 

361 return {'status': 'error', 'error': str(e)} 

362 

363 

364def list_downloaded() -> List[Dict]: 

365 """List all downloaded GGUF models with their sizes, quant types, and paths. 

366 

367 Returns: 

368 List of dicts with keys: filename, quant, size_bytes, size_gb, path, repo. 

369 """ 

370 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf' 

371 if not gguf_dir.exists(): 

372 return [] 

373 

374 results = [] 

375 for repo_dir in gguf_dir.iterdir(): 

376 if not repo_dir.is_dir(): 

377 continue 

378 for gguf_file in repo_dir.glob('*.gguf'): 

379 try: 

380 size_bytes = gguf_file.stat().st_size 

381 except OSError: 

382 size_bytes = 0 

383 

384 quant_label = _extract_quant_from_path(gguf_file) 

385 # Convert repo dir name back to repo_id 

386 repo_name = repo_dir.name.replace('--', '/') 

387 

388 results.append({ 

389 'filename': gguf_file.name, 

390 'quant': quant_label, 

391 'size_bytes': size_bytes, 

392 'size_gb': round(size_bytes / (1024 ** 3), 2), 

393 'path': str(gguf_file), 

394 'repo': repo_name, 

395 }) 

396 

397 # Sort by size descending (biggest models first) 

398 results.sort(key=lambda x: x['size_bytes'], reverse=True) 

399 return results 

400 

401 

402def list_available(model_name: str) -> List[Dict]: 

403 """List available GGUF files for a model on HuggingFace. 

404 

405 Proxy to hf_model_resolver.list_available(). 

406 

407 Args: 

408 model_name: e.g. "Qwen/Qwen3-8B" 

409 

410 Returns: 

411 List of dicts with keys: repo_id, filename, quant, quant_rank, size_bytes. 

412 On error: empty list. 

413 """ 

414 resolver = _get_resolver() 

415 if resolver is None: 

416 return [] 

417 try: 

418 return resolver.list_available(model_name) 

419 except ImportError as e: 

420 logger.warning(f"Cannot list available models: {e}") 

421 return [] 

422 except Exception as e: 

423 logger.error(f"Error listing available models: {e}") 

424 return [] 

425 

426 

427def remove_model(model_id: str) -> bool: 

428 """Remove a downloaded GGUF model and its catalog entry. 

429 

430 Args: 

431 model_id: Either a catalog ID (e.g. "llm-qwen3-8b-q4-k-m") or 

432 a GGUF filename (e.g. "Qwen3-8B-Q4_K_M.gguf"). 

433 

434 Returns: 

435 True if something was removed, False otherwise. 

436 """ 

437 global _active_model 

438 removed = False 

439 

440 # If active model matches, stop tracking 

441 if _active_model and _active_model.get('catalog_id') == model_id: 

442 _active_model = None 

443 

444 # Remove from catalog 

445 catalog = _get_catalog() 

446 if catalog: 

447 if catalog.unregister(model_id): 

448 removed = True 

449 

450 # Try to find and delete the GGUF file 

451 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf' 

452 if gguf_dir.exists(): 

453 for gguf_file in gguf_dir.rglob('*.gguf'): 

454 # Match by filename or by catalog_id derived from filename 

455 if (gguf_file.name == model_id or 

456 _make_catalog_id( 

457 gguf_file.parent.name.replace('--', '/'), 

458 _extract_quant_from_path(gguf_file) 

459 ) == model_id): 

460 try: 

461 gguf_file.unlink() 

462 logger.info(f"Deleted GGUF file: {gguf_file}") 

463 removed = True 

464 # Clean up empty parent directory 

465 try: 

466 if not any(gguf_file.parent.iterdir()): 

467 gguf_file.parent.rmdir() 

468 except OSError: 

469 pass 

470 except OSError as e: 

471 logger.error(f"Failed to delete {gguf_file}: {e}") 

472 

473 # Remove from storage manifest 

474 try: 

475 from integrations.service_tools.model_storage import model_storage 

476 # Check all gguf/* entries in the manifest 

477 manifest = model_storage.get_manifest() 

478 for tool_name in list(manifest.get('tools', {}).keys()): 

479 if tool_name.startswith('gguf/') and model_id in tool_name: 

480 model_storage.remove_tool(tool_name) 

481 removed = True 

482 except ImportError: 

483 pass 

484 

485 if removed: 

486 logger.info(f"Removed model: {model_id}") 

487 else: 

488 logger.warning(f"Model not found for removal: {model_id}") 

489 

490 return removed 

491 

492 

493def get_active_model() -> Optional[Dict]: 

494 """Return info about the currently running model, or None if nothing is active. 

495 

496 Returns: 

497 Dict with keys: catalog_id, model, quant, endpoint, gguf_path, port, 

498 started_at, uptime_s. 

499 """ 

500 if _active_model is None: 

501 return None 

502 

503 result = dict(_active_model) 

504 result['uptime_s'] = round(time.time() - result.get('started_at', time.time()), 1) 

505 return result 

506 

507 

508def status() -> Dict: 

509 """Return full onboarding status: active model, server health, VRAM, downloads. 

510 

511 Returns: 

512 Dict with keys: active_model, server_healthy, vram, downloaded_count, 

513 downloaded_size_gb. 

514 """ 

515 result = { 

516 'active_model': get_active_model(), 

517 'server_healthy': False, 

518 'vram': {}, 

519 'downloaded_count': 0, 

520 'downloaded_size_gb': 0.0, 

521 } 

522 

523 # Server health check 

524 lcpp = _get_llamacpp_manager() 

525 if lcpp and _active_model: 

526 try: 

527 # Try to check if the server process is running 

528 healthy = lcpp.is_running() if hasattr(lcpp, 'is_running') else False 

529 result['server_healthy'] = healthy 

530 except Exception: 

531 result['server_healthy'] = False 

532 

533 # VRAM info 

534 vm = _get_vram_manager() 

535 if vm: 

536 try: 

537 gpu_info = vm.detect_gpu() 

538 result['vram'] = { 

539 'gpu_name': gpu_info.get('name'), 

540 'total_gb': gpu_info.get('total_gb', 0.0), 

541 'free_gb': gpu_info.get('free_gb', 0.0), 

542 'cuda_available': gpu_info.get('cuda_available', False), 

543 } 

544 except Exception: 

545 pass 

546 

547 # Downloaded models 

548 downloaded = list_downloaded() 

549 result['downloaded_count'] = len(downloaded) 

550 result['downloaded_size_gb'] = round( 

551 sum(m.get('size_gb', 0.0) for m in downloaded), 2 

552 ) 

553 

554 return result 

555 

556 

557# ── Registration helpers ──────────────────────────────────────────── 

558 

559def _register_in_catalog(catalog_id: str, model_name: str, quant: str, 

560 gguf_path: Path, port: int) -> None: 

561 """Register or update a model entry in the ModelCatalog.""" 

562 catalog = _get_catalog() 

563 if catalog is None: 

564 return 

565 

566 try: 

567 from integrations.service_tools.model_catalog import ModelEntry, ModelType 

568 except ImportError: 

569 return 

570 

571 # Human-readable display name 

572 if '/' in model_name: 

573 display_name = model_name.split('/')[-1] 

574 else: 

575 display_name = model_name 

576 

577 # Calculate size for disk_gb 

578 try: 

579 disk_gb = round(gguf_path.stat().st_size / (1024 ** 3), 2) 

580 except OSError: 

581 disk_gb = 0.0 

582 

583 entry = ModelEntry( 

584 id=catalog_id, 

585 name=f"{display_name} ({quant})", 

586 model_type=ModelType.LLM, 

587 source='huggingface', 

588 repo_id=model_name, 

589 files={'model': gguf_path.name, 'path': str(gguf_path)}, 

590 disk_gb=disk_gb, 

591 backend='llama.cpp', 

592 supports_gpu=True, 

593 supports_cpu=True, 

594 quality_score=0.7, 

595 speed_score=0.8, 

596 cost_per_1k=0.0, 

597 tags=['local', 'gguf', 'onboarded', quant.lower()], 

598 capabilities={ 

599 'quant': quant, 

600 'endpoint': f'http://127.0.0.1:{port}', 

601 'openai_compatible': True, 

602 }, 

603 ) 

604 

605 catalog.register(entry) 

606 catalog.mark_downloaded(catalog_id) 

607 catalog.mark_loaded(catalog_id, device='gpu') 

608 logger.info(f"Registered {catalog_id} in model catalog") 

609 

610 

611def _register_in_registry(catalog_id: str, model_name: str, port: int) -> None: 

612 """Register a ModelBackend in the ModelRegistry for LLM routing.""" 

613 registry = _get_model_registry() 

614 if registry is None: 

615 return 

616 

617 try: 

618 from integrations.agent_engine.model_registry import ModelBackend, ModelTier 

619 except ImportError: 

620 return 

621 

622 # Human-readable display name 

623 if '/' in model_name: 

624 display_name = model_name.split('/')[-1] 

625 else: 

626 display_name = model_name 

627 

628 backend = ModelBackend( 

629 model_id=catalog_id, 

630 display_name=display_name, 

631 tier=ModelTier.FAST, 

632 config_list_entry={ 

633 'model': catalog_id, 

634 'base_url': f'http://127.0.0.1:{port}/v1', 

635 'api_key': 'not-needed', 

636 }, 

637 avg_latency_ms=500.0, 

638 accuracy_score=0.7, 

639 cost_per_1k_tokens=0.0, 

640 is_local=True, 

641 hardware_dependent=True, 

642 ) 

643 

644 registry.register(backend) 

645 logger.info(f"Registered {catalog_id} in model registry (tier=FAST, local)") 

646 

647 

648# ── Flask Blueprint (lazy) ────────────────────────────────────────── 

649 

650def _create_blueprint(): 

651 """Create the Flask Blueprint for model onboarding API endpoints. 

652 

653 Imports Flask lazily so this module can be imported without Flask 

654 being installed. 

655 """ 

656 try: 

657 from flask import Blueprint, request, jsonify 

658 except ImportError: 

659 logger.debug("Flask not available — model_onboarding blueprint disabled") 

660 return None 

661 

662 model_onboarding_bp = Blueprint('model_onboarding', __name__) 

663 

664 @model_onboarding_bp.route('/api/models/onboard', methods=['POST']) 

665 def api_onboard(): 

666 """Onboard a new model from HuggingFace. 

667 

668 Body: {"model": "Qwen/Qwen3-8B", "quant": "auto", "port": 8080} 

669 """ 

670 try: 

671 data = request.get_json(force=True, silent=True) or {} 

672 model = data.get('model', '').strip() 

673 if not model: 

674 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400 

675 

676 q = data.get('quant', 'auto') 

677 p = data.get('port', 0) 

678 result = onboard(model, quant=q, port=int(p)) 

679 

680 code = 200 if result.get('status') == 'ready' else 500 

681 return jsonify(result), code 

682 except Exception as e: 

683 logger.error(f"API onboard error: {e}", exc_info=True) 

684 return jsonify({'status': 'error', 'error': str(e)}), 500 

685 

686 @model_onboarding_bp.route('/api/models/switch', methods=['POST']) 

687 def api_switch(): 

688 """Switch the active model. 

689 

690 Body: {"model": "meta-llama/Llama-3.1-8B", "quant": "auto"} 

691 """ 

692 try: 

693 data = request.get_json(force=True, silent=True) or {} 

694 model = data.get('model', '').strip() 

695 if not model: 

696 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400 

697 

698 q = data.get('quant', 'auto') 

699 result = switch_model(model, quant=q) 

700 

701 code = 200 if result.get('status') == 'ready' else 500 

702 return jsonify(result), code 

703 except Exception as e: 

704 logger.error(f"API switch error: {e}", exc_info=True) 

705 return jsonify({'status': 'error', 'error': str(e)}), 500 

706 

707 @model_onboarding_bp.route('/api/models/available', methods=['GET']) 

708 def api_available(): 

709 """List available GGUF files for a model on HuggingFace. 

710 

711 Query: ?model=Qwen/Qwen3-8B 

712 """ 

713 try: 

714 model = request.args.get('model', '').strip() 

715 if not model: 

716 return jsonify({'status': 'error', 'error': 'Missing "model" query param'}), 400 

717 

718 results = list_available(model) 

719 return jsonify({'status': 'ok', 'models': results}), 200 

720 except Exception as e: 

721 logger.error(f"API available error: {e}", exc_info=True) 

722 return jsonify({'status': 'error', 'error': str(e)}), 500 

723 

724 @model_onboarding_bp.route('/api/models/status', methods=['GET']) 

725 def api_status(): 

726 """Return full onboarding status.""" 

727 try: 

728 return jsonify(status()), 200 

729 except Exception as e: 

730 logger.error(f"API status error: {e}", exc_info=True) 

731 return jsonify({'status': 'error', 'error': str(e)}), 500 

732 

733 @model_onboarding_bp.route('/api/models/checklist', methods=['GET']) 

734 def api_checklist(): 

735 """T19: Model onboarding checklist — validates all model types have entries. 

736 

737 Returns a checklist of 8 model types with status (ok/missing/error) 

738 and the number of registered + downloaded models per type. 

739 """ 

740 try: 

741 from .model_catalog import get_catalog 

742 catalog = get_catalog() 

743 MODEL_TYPES = ['llm', 'tts', 'stt', 'vlm', 'image_gen', 

744 'video_gen', 'audio_gen', 'embedding'] 

745 checklist = [] 

746 for mt in MODEL_TYPES: 

747 entries = [e for e in catalog.list_all() if e.model_type == mt] 

748 downloaded = [e for e in entries if e.downloaded] 

749 loaded = [e for e in entries if e.loaded] 

750 checklist.append({ 

751 'model_type': mt, 

752 'status': 'ok' if entries else 'missing', 

753 'registered': len(entries), 

754 'downloaded': len(downloaded), 

755 'loaded': len(loaded), 

756 'models': [{'id': e.model_id, 'name': e.display_name, 

757 'downloaded': e.downloaded, 'loaded': e.loaded} 

758 for e in entries], 

759 }) 

760 all_ok = all(c['status'] == 'ok' for c in checklist) 

761 return jsonify({ 

762 'status': 'ok' if all_ok else 'incomplete', 

763 'checklist': checklist, 

764 'total_registered': sum(c['registered'] for c in checklist), 

765 'total_downloaded': sum(c['downloaded'] for c in checklist), 

766 }), 200 

767 except Exception as e: 

768 logger.error(f"API checklist error: {e}", exc_info=True) 

769 return jsonify({'status': 'error', 'error': str(e)}), 500 

770 

771 @model_onboarding_bp.route('/api/models/downloaded', methods=['GET']) 

772 def api_downloaded(): 

773 """List all downloaded GGUF models.""" 

774 try: 

775 models = list_downloaded() 

776 return jsonify({'status': 'ok', 'models': models}), 200 

777 except Exception as e: 

778 logger.error(f"API downloaded error: {e}", exc_info=True) 

779 return jsonify({'status': 'error', 'error': str(e)}), 500 

780 

781 @model_onboarding_bp.route('/api/models/<model_id>', methods=['DELETE']) 

782 def api_remove(model_id): 

783 """Remove a downloaded GGUF model. 

784 

785 Path: /api/models/<model_id> 

786 """ 

787 try: 

788 success = remove_model(model_id) 

789 if success: 

790 return jsonify({'status': 'ok', 'removed': model_id}), 200 

791 else: 

792 return jsonify({'status': 'error', 'error': f'Model {model_id} not found'}), 404 

793 except Exception as e: 

794 logger.error(f"API remove error: {e}", exc_info=True) 

795 return jsonify({'status': 'error', 'error': str(e)}), 500 

796 

797 return model_onboarding_bp 

798 

799 

800# ── Module-level Blueprint accessor ───────────────────────────────── 

801 

802_blueprint_instance = None 

803_blueprint_lock = threading.Lock() 

804 

805 

806def get_blueprint(): 

807 """Get or create the model_onboarding Flask Blueprint. 

808 

809 Returns None if Flask is not installed. 

810 """ 

811 global _blueprint_instance 

812 if _blueprint_instance is None: 

813 with _blueprint_lock: 

814 if _blueprint_instance is None: 

815 _blueprint_instance = _create_blueprint() 

816 return _blueprint_instance 

817 

818 

819# Convenience alias for registration in hart_intelligence_entry.py: 

820# from integrations.service_tools.model_onboarding import model_onboarding_bp 

821# if model_onboarding_bp: app.register_blueprint(model_onboarding_bp) 

822model_onboarding_bp = get_blueprint()