Coverage for integrations / service_tools / model_onboarding.py: 17.9%
313 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2Model Onboarding — one-command flow to go from model name to running inference.
4 onboard("Qwen/Qwen3-8B")
5 # 1. Finds unsloth/Qwen3-8B-GGUF on HuggingFace
6 # 2. Picks Q4_K_M quantization for user's GPU
7 # 3. Downloads GGUF to ~/.hevolve/models/
8 # 4. Ensures llama.cpp binary is available
9 # 5. Starts llama-server with optimal params
10 # 6. Registers model in catalog + registry
11 # 7. Returns endpoint URL ready for inference
13Also provides CLI/API for listing available models, switching active model,
14and removing downloaded models.
15"""
17import logging
18import re
19import threading
20import time
21from pathlib import Path
22from typing import Dict, List, Optional
24logger = logging.getLogger(__name__)
27# ── Nunba companion detection ──────────────────────────────────────
29def _is_nunba_bundled() -> bool:
30 """Detect if Nunba (companion desktop app) is managing llama.cpp.
32 When HARTOS is pip-installed inside Nunba, `hartos_backend_adapter` is
33 in sys.modules. Nunba owns: llama.cpp lifecycle, model downloads,
34 config.json, port 8080. We should not duplicate that work.
35 """
36 import sys
37 return 'hartos_backend_adapter' in sys.modules
40def _onboard_via_nunba(model_name: str, quant: str, port: int) -> dict:
41 """Onboard a model by delegating to Nunba's existing infrastructure.
43 Nunba already has llama.cpp running on port 8080. We just need to
44 tell it to load a different model via its adapter, or check if
45 the requested model is already active.
46 """
47 import urllib.request
48 import urllib.error
49 import json
51 # Check if Nunba's llama.cpp is already running
52 llm_port = 8080
53 try:
54 req = urllib.request.urlopen(f'http://127.0.0.1:{llm_port}/health', timeout=3)
55 if req.status == 200:
56 logger.info("Nunba's llama.cpp already running on port %d", llm_port)
57 return {
58 'status': 'ready',
59 'model': model_name,
60 'quant': quant,
61 'endpoint': f'http://127.0.0.1:{llm_port}',
62 'source': 'nunba',
63 'note': 'Nunba manages the llama.cpp server. '
64 'Use Nunba settings to change models.',
65 }
66 except (urllib.error.URLError, OSError):
67 pass
69 # Nunba not running yet — tell the user
70 return {
71 'status': 'waiting',
72 'model': model_name,
73 'endpoint': f'http://127.0.0.1:{llm_port}',
74 'source': 'nunba',
75 'note': 'HARTOS is bundled with Nunba. Start the Nunba desktop app '
76 'to activate llama.cpp inference. Nunba manages model lifecycle.',
77 }
80# ── Module-level state ──────────────────────────────────────────────
82_onboard_lock = threading.Lock()
83_active_model: Optional[Dict] = None # tracks the currently running model
86# ── Lazy imports (all behind try/except) ────────────────────────────
88def _get_resolver():
89 """Lazy-load HFModelResolver singleton."""
90 try:
91 from integrations.service_tools.hf_model_resolver import get_resolver
92 return get_resolver()
93 except ImportError:
94 logger.warning("hf_model_resolver not available")
95 return None
98def _get_llamacpp_manager():
99 """Lazy-load llamacpp_manager singleton."""
100 try:
101 from integrations.service_tools.llamacpp_manager import get_llamacpp_manager
102 return get_llamacpp_manager()
103 except ImportError:
104 logger.warning("llamacpp_manager not available")
105 return None
108def _get_catalog():
109 """Lazy-load ModelCatalog singleton."""
110 try:
111 from integrations.service_tools.model_catalog import get_catalog
112 return get_catalog()
113 except ImportError:
114 logger.warning("model_catalog not available")
115 return None
118def _get_model_registry():
119 """Lazy-load ModelRegistry singleton."""
120 try:
121 from integrations.agent_engine.model_registry import model_registry
122 return model_registry
123 except ImportError:
124 logger.warning("model_registry not available")
125 return None
128def _get_vram_manager():
129 """Lazy-load VRAMManager singleton."""
130 try:
131 from integrations.service_tools.vram_manager import vram_manager
132 return vram_manager
133 except ImportError:
134 logger.warning("vram_manager not available")
135 return None
138def _get_default_port() -> int:
139 """Get the llama.cpp port from port_registry, default 8080."""
140 try:
141 from core.port_registry import get_port
142 return get_port('llm')
143 except Exception:
144 return 8080
147def _make_catalog_id(model_name: str, quant: str) -> str:
148 """Create a stable catalog ID from model name and quantization.
150 E.g. "Qwen/Qwen3-8B" + "Q4_K_M" -> "llm-qwen3-8b-q4-k-m"
151 """
152 # Take basename from repo-style names
153 if '/' in model_name:
154 basename = model_name.split('/')[-1]
155 else:
156 basename = model_name
157 slug = re.sub(r'[^a-z0-9]+', '-', basename.lower()).strip('-')
158 quant_slug = re.sub(r'[^a-z0-9]+', '-', quant.lower()).strip('-')
159 return f"llm-{slug}-{quant_slug}"
162def _extract_quant_from_path(gguf_path: Path) -> str:
163 """Extract quantization label from a GGUF filename."""
164 try:
165 from integrations.service_tools.hf_model_resolver import _extract_quant
166 q = _extract_quant(gguf_path.name)
167 if q:
168 return q
169 except ImportError:
170 pass
171 # Fallback regex
172 m = re.search(r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)', gguf_path.name, re.IGNORECASE)
173 return m.group(1).upper() if m else 'unknown'
176# ── Core functions ──────────────────────────────────────────────────
178def onboard(model_name: str, quant: str = 'auto', port: int = 0) -> Dict:
179 """Full onboarding pipeline: resolve, download, start, register.
181 Args:
182 model_name: HuggingFace model identifier (e.g. "Qwen/Qwen3-8B").
183 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto'.
184 port: Port for llama-server. 0 = use port registry default.
186 Returns:
187 Status dict with keys: status, model, quant, endpoint, gguf_path.
188 On error: status='error', error=<message>.
189 """
190 global _active_model
192 if port == 0:
193 port = _get_default_port()
195 # ── Nunba companion detection ──
196 # When Nunba (sibling repo) is installed, it owns the llama.cpp lifecycle
197 # and model management. We defer to it instead of duplicating.
198 if _is_nunba_bundled():
199 return _onboard_via_nunba(model_name, quant, port)
201 with _onboard_lock:
202 try:
203 # Step 1: Resolve and download GGUF
204 resolver = _get_resolver()
205 if resolver is None:
206 return {
207 'status': 'error',
208 'error': 'hf_model_resolver is not available. '
209 'Install huggingface_hub: pip install huggingface_hub',
210 }
212 logger.info(f"Onboarding {model_name} (quant={quant}, port={port})")
213 gguf_path = resolver.resolve(model_name, quant)
214 quant_used = _extract_quant_from_path(gguf_path)
216 # Step 2: Ensure llama.cpp binary is available
217 lcpp = _get_llamacpp_manager()
218 if lcpp is None:
219 return {
220 'status': 'error',
221 'error': 'llamacpp_manager is not available',
222 }
224 server_bin = lcpp.get_server_binary()
225 if server_bin is None:
226 logger.info("llama-server binary not found, downloading...")
227 lcpp.download_server()
228 server_bin = lcpp.get_server_binary()
229 if server_bin is None:
230 return {
231 'status': 'error',
232 'error': 'Failed to obtain llama-server binary',
233 }
235 # Step 3: Start llama-server
236 logger.info(f"Starting llama-server on port {port}...")
237 if not lcpp.start(str(gguf_path), port):
238 return {
239 'status': 'error',
240 'error': f'llama-server failed to start on port {port}. '
241 f'Check if the port is in use or the GGUF file is valid.',
242 }
244 # Step 4: Register in catalog
245 catalog_id = _make_catalog_id(model_name, quant_used)
246 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port)
248 # Step 5: Register in model registry
249 _register_in_registry(catalog_id, model_name, port)
251 # Step 6: Track active model
252 endpoint = f'http://127.0.0.1:{port}'
253 _active_model = {
254 'catalog_id': catalog_id,
255 'model': model_name,
256 'quant': quant_used,
257 'endpoint': endpoint,
258 'gguf_path': str(gguf_path),
259 'port': port,
260 'started_at': time.time(),
261 }
263 result = {
264 'status': 'ready',
265 'model': model_name,
266 'quant': quant_used,
267 'endpoint': endpoint,
268 'gguf_path': str(gguf_path),
269 }
270 logger.info(f"Onboarding complete: {model_name} ({quant_used}) at {endpoint}")
271 return result
273 except FileNotFoundError as e:
274 logger.error(f"Onboard failed — model not found: {e}")
275 return {'status': 'error', 'error': str(e)}
276 except ImportError as e:
277 logger.error(f"Onboard failed — missing dependency: {e}")
278 return {'status': 'error', 'error': str(e)}
279 except Exception as e:
280 logger.error(f"Onboard failed: {e}", exc_info=True)
281 return {'status': 'error', 'error': str(e)}
284def switch_model(model_name: str, quant: str = 'auto') -> Dict:
285 """Hot-swap the active model without full restart.
287 Resolves and downloads if needed, then calls llamacpp_manager.swap_model().
288 Updates catalog and registry entries.
290 Args:
291 model_name: HuggingFace model identifier.
292 quant: Quantization level or 'auto'.
294 Returns:
295 Status dict. On error: status='error', error=<message>.
296 """
297 global _active_model
299 with _onboard_lock:
300 try:
301 # Resolve + download
302 resolver = _get_resolver()
303 if resolver is None:
304 return {
305 'status': 'error',
306 'error': 'hf_model_resolver is not available',
307 }
309 gguf_path = resolver.resolve(model_name, quant)
310 quant_used = _extract_quant_from_path(gguf_path)
312 # Swap model in running server
313 lcpp = _get_llamacpp_manager()
314 if lcpp is None:
315 return {
316 'status': 'error',
317 'error': 'llamacpp_manager is not available',
318 }
320 logger.info(f"Swapping to {model_name} ({quant_used})...")
321 lcpp.swap_model(str(gguf_path))
323 # Unmark previous active model in catalog
324 if _active_model:
325 catalog = _get_catalog()
326 if catalog:
327 catalog.mark_unloaded(_active_model.get('catalog_id', ''))
329 # Register new model
330 catalog_id = _make_catalog_id(model_name, quant_used)
331 port = (_active_model or {}).get('port', _get_default_port())
332 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port)
333 _register_in_registry(catalog_id, model_name, port)
335 endpoint = f'http://127.0.0.1:{port}'
336 _active_model = {
337 'catalog_id': catalog_id,
338 'model': model_name,
339 'quant': quant_used,
340 'endpoint': endpoint,
341 'gguf_path': str(gguf_path),
342 'port': port,
343 'started_at': time.time(),
344 }
346 result = {
347 'status': 'ready',
348 'model': model_name,
349 'quant': quant_used,
350 'endpoint': endpoint,
351 'gguf_path': str(gguf_path),
352 }
353 logger.info(f"Model swap complete: {model_name} ({quant_used})")
354 return result
356 except FileNotFoundError as e:
357 logger.error(f"Switch failed — model not found: {e}")
358 return {'status': 'error', 'error': str(e)}
359 except Exception as e:
360 logger.error(f"Switch failed: {e}", exc_info=True)
361 return {'status': 'error', 'error': str(e)}
364def list_downloaded() -> List[Dict]:
365 """List all downloaded GGUF models with their sizes, quant types, and paths.
367 Returns:
368 List of dicts with keys: filename, quant, size_bytes, size_gb, path, repo.
369 """
370 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf'
371 if not gguf_dir.exists():
372 return []
374 results = []
375 for repo_dir in gguf_dir.iterdir():
376 if not repo_dir.is_dir():
377 continue
378 for gguf_file in repo_dir.glob('*.gguf'):
379 try:
380 size_bytes = gguf_file.stat().st_size
381 except OSError:
382 size_bytes = 0
384 quant_label = _extract_quant_from_path(gguf_file)
385 # Convert repo dir name back to repo_id
386 repo_name = repo_dir.name.replace('--', '/')
388 results.append({
389 'filename': gguf_file.name,
390 'quant': quant_label,
391 'size_bytes': size_bytes,
392 'size_gb': round(size_bytes / (1024 ** 3), 2),
393 'path': str(gguf_file),
394 'repo': repo_name,
395 })
397 # Sort by size descending (biggest models first)
398 results.sort(key=lambda x: x['size_bytes'], reverse=True)
399 return results
402def list_available(model_name: str) -> List[Dict]:
403 """List available GGUF files for a model on HuggingFace.
405 Proxy to hf_model_resolver.list_available().
407 Args:
408 model_name: e.g. "Qwen/Qwen3-8B"
410 Returns:
411 List of dicts with keys: repo_id, filename, quant, quant_rank, size_bytes.
412 On error: empty list.
413 """
414 resolver = _get_resolver()
415 if resolver is None:
416 return []
417 try:
418 return resolver.list_available(model_name)
419 except ImportError as e:
420 logger.warning(f"Cannot list available models: {e}")
421 return []
422 except Exception as e:
423 logger.error(f"Error listing available models: {e}")
424 return []
427def remove_model(model_id: str) -> bool:
428 """Remove a downloaded GGUF model and its catalog entry.
430 Args:
431 model_id: Either a catalog ID (e.g. "llm-qwen3-8b-q4-k-m") or
432 a GGUF filename (e.g. "Qwen3-8B-Q4_K_M.gguf").
434 Returns:
435 True if something was removed, False otherwise.
436 """
437 global _active_model
438 removed = False
440 # If active model matches, stop tracking
441 if _active_model and _active_model.get('catalog_id') == model_id:
442 _active_model = None
444 # Remove from catalog
445 catalog = _get_catalog()
446 if catalog:
447 if catalog.unregister(model_id):
448 removed = True
450 # Try to find and delete the GGUF file
451 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf'
452 if gguf_dir.exists():
453 for gguf_file in gguf_dir.rglob('*.gguf'):
454 # Match by filename or by catalog_id derived from filename
455 if (gguf_file.name == model_id or
456 _make_catalog_id(
457 gguf_file.parent.name.replace('--', '/'),
458 _extract_quant_from_path(gguf_file)
459 ) == model_id):
460 try:
461 gguf_file.unlink()
462 logger.info(f"Deleted GGUF file: {gguf_file}")
463 removed = True
464 # Clean up empty parent directory
465 try:
466 if not any(gguf_file.parent.iterdir()):
467 gguf_file.parent.rmdir()
468 except OSError:
469 pass
470 except OSError as e:
471 logger.error(f"Failed to delete {gguf_file}: {e}")
473 # Remove from storage manifest
474 try:
475 from integrations.service_tools.model_storage import model_storage
476 # Check all gguf/* entries in the manifest
477 manifest = model_storage.get_manifest()
478 for tool_name in list(manifest.get('tools', {}).keys()):
479 if tool_name.startswith('gguf/') and model_id in tool_name:
480 model_storage.remove_tool(tool_name)
481 removed = True
482 except ImportError:
483 pass
485 if removed:
486 logger.info(f"Removed model: {model_id}")
487 else:
488 logger.warning(f"Model not found for removal: {model_id}")
490 return removed
493def get_active_model() -> Optional[Dict]:
494 """Return info about the currently running model, or None if nothing is active.
496 Returns:
497 Dict with keys: catalog_id, model, quant, endpoint, gguf_path, port,
498 started_at, uptime_s.
499 """
500 if _active_model is None:
501 return None
503 result = dict(_active_model)
504 result['uptime_s'] = round(time.time() - result.get('started_at', time.time()), 1)
505 return result
508def status() -> Dict:
509 """Return full onboarding status: active model, server health, VRAM, downloads.
511 Returns:
512 Dict with keys: active_model, server_healthy, vram, downloaded_count,
513 downloaded_size_gb.
514 """
515 result = {
516 'active_model': get_active_model(),
517 'server_healthy': False,
518 'vram': {},
519 'downloaded_count': 0,
520 'downloaded_size_gb': 0.0,
521 }
523 # Server health check
524 lcpp = _get_llamacpp_manager()
525 if lcpp and _active_model:
526 try:
527 # Try to check if the server process is running
528 healthy = lcpp.is_running() if hasattr(lcpp, 'is_running') else False
529 result['server_healthy'] = healthy
530 except Exception:
531 result['server_healthy'] = False
533 # VRAM info
534 vm = _get_vram_manager()
535 if vm:
536 try:
537 gpu_info = vm.detect_gpu()
538 result['vram'] = {
539 'gpu_name': gpu_info.get('name'),
540 'total_gb': gpu_info.get('total_gb', 0.0),
541 'free_gb': gpu_info.get('free_gb', 0.0),
542 'cuda_available': gpu_info.get('cuda_available', False),
543 }
544 except Exception:
545 pass
547 # Downloaded models
548 downloaded = list_downloaded()
549 result['downloaded_count'] = len(downloaded)
550 result['downloaded_size_gb'] = round(
551 sum(m.get('size_gb', 0.0) for m in downloaded), 2
552 )
554 return result
557# ── Registration helpers ────────────────────────────────────────────
559def _register_in_catalog(catalog_id: str, model_name: str, quant: str,
560 gguf_path: Path, port: int) -> None:
561 """Register or update a model entry in the ModelCatalog."""
562 catalog = _get_catalog()
563 if catalog is None:
564 return
566 try:
567 from integrations.service_tools.model_catalog import ModelEntry, ModelType
568 except ImportError:
569 return
571 # Human-readable display name
572 if '/' in model_name:
573 display_name = model_name.split('/')[-1]
574 else:
575 display_name = model_name
577 # Calculate size for disk_gb
578 try:
579 disk_gb = round(gguf_path.stat().st_size / (1024 ** 3), 2)
580 except OSError:
581 disk_gb = 0.0
583 entry = ModelEntry(
584 id=catalog_id,
585 name=f"{display_name} ({quant})",
586 model_type=ModelType.LLM,
587 source='huggingface',
588 repo_id=model_name,
589 files={'model': gguf_path.name, 'path': str(gguf_path)},
590 disk_gb=disk_gb,
591 backend='llama.cpp',
592 supports_gpu=True,
593 supports_cpu=True,
594 quality_score=0.7,
595 speed_score=0.8,
596 cost_per_1k=0.0,
597 tags=['local', 'gguf', 'onboarded', quant.lower()],
598 capabilities={
599 'quant': quant,
600 'endpoint': f'http://127.0.0.1:{port}',
601 'openai_compatible': True,
602 },
603 )
605 catalog.register(entry)
606 catalog.mark_downloaded(catalog_id)
607 catalog.mark_loaded(catalog_id, device='gpu')
608 logger.info(f"Registered {catalog_id} in model catalog")
611def _register_in_registry(catalog_id: str, model_name: str, port: int) -> None:
612 """Register a ModelBackend in the ModelRegistry for LLM routing."""
613 registry = _get_model_registry()
614 if registry is None:
615 return
617 try:
618 from integrations.agent_engine.model_registry import ModelBackend, ModelTier
619 except ImportError:
620 return
622 # Human-readable display name
623 if '/' in model_name:
624 display_name = model_name.split('/')[-1]
625 else:
626 display_name = model_name
628 backend = ModelBackend(
629 model_id=catalog_id,
630 display_name=display_name,
631 tier=ModelTier.FAST,
632 config_list_entry={
633 'model': catalog_id,
634 'base_url': f'http://127.0.0.1:{port}/v1',
635 'api_key': 'not-needed',
636 },
637 avg_latency_ms=500.0,
638 accuracy_score=0.7,
639 cost_per_1k_tokens=0.0,
640 is_local=True,
641 hardware_dependent=True,
642 )
644 registry.register(backend)
645 logger.info(f"Registered {catalog_id} in model registry (tier=FAST, local)")
648# ── Flask Blueprint (lazy) ──────────────────────────────────────────
650def _create_blueprint():
651 """Create the Flask Blueprint for model onboarding API endpoints.
653 Imports Flask lazily so this module can be imported without Flask
654 being installed.
655 """
656 try:
657 from flask import Blueprint, request, jsonify
658 except ImportError:
659 logger.debug("Flask not available — model_onboarding blueprint disabled")
660 return None
662 model_onboarding_bp = Blueprint('model_onboarding', __name__)
664 @model_onboarding_bp.route('/api/models/onboard', methods=['POST'])
665 def api_onboard():
666 """Onboard a new model from HuggingFace.
668 Body: {"model": "Qwen/Qwen3-8B", "quant": "auto", "port": 8080}
669 """
670 try:
671 data = request.get_json(force=True, silent=True) or {}
672 model = data.get('model', '').strip()
673 if not model:
674 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400
676 q = data.get('quant', 'auto')
677 p = data.get('port', 0)
678 result = onboard(model, quant=q, port=int(p))
680 code = 200 if result.get('status') == 'ready' else 500
681 return jsonify(result), code
682 except Exception as e:
683 logger.error(f"API onboard error: {e}", exc_info=True)
684 return jsonify({'status': 'error', 'error': str(e)}), 500
686 @model_onboarding_bp.route('/api/models/switch', methods=['POST'])
687 def api_switch():
688 """Switch the active model.
690 Body: {"model": "meta-llama/Llama-3.1-8B", "quant": "auto"}
691 """
692 try:
693 data = request.get_json(force=True, silent=True) or {}
694 model = data.get('model', '').strip()
695 if not model:
696 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400
698 q = data.get('quant', 'auto')
699 result = switch_model(model, quant=q)
701 code = 200 if result.get('status') == 'ready' else 500
702 return jsonify(result), code
703 except Exception as e:
704 logger.error(f"API switch error: {e}", exc_info=True)
705 return jsonify({'status': 'error', 'error': str(e)}), 500
707 @model_onboarding_bp.route('/api/models/available', methods=['GET'])
708 def api_available():
709 """List available GGUF files for a model on HuggingFace.
711 Query: ?model=Qwen/Qwen3-8B
712 """
713 try:
714 model = request.args.get('model', '').strip()
715 if not model:
716 return jsonify({'status': 'error', 'error': 'Missing "model" query param'}), 400
718 results = list_available(model)
719 return jsonify({'status': 'ok', 'models': results}), 200
720 except Exception as e:
721 logger.error(f"API available error: {e}", exc_info=True)
722 return jsonify({'status': 'error', 'error': str(e)}), 500
724 @model_onboarding_bp.route('/api/models/status', methods=['GET'])
725 def api_status():
726 """Return full onboarding status."""
727 try:
728 return jsonify(status()), 200
729 except Exception as e:
730 logger.error(f"API status error: {e}", exc_info=True)
731 return jsonify({'status': 'error', 'error': str(e)}), 500
733 @model_onboarding_bp.route('/api/models/checklist', methods=['GET'])
734 def api_checklist():
735 """T19: Model onboarding checklist — validates all model types have entries.
737 Returns a checklist of 8 model types with status (ok/missing/error)
738 and the number of registered + downloaded models per type.
739 """
740 try:
741 from .model_catalog import get_catalog
742 catalog = get_catalog()
743 MODEL_TYPES = ['llm', 'tts', 'stt', 'vlm', 'image_gen',
744 'video_gen', 'audio_gen', 'embedding']
745 checklist = []
746 for mt in MODEL_TYPES:
747 entries = [e for e in catalog.list_all() if e.model_type == mt]
748 downloaded = [e for e in entries if e.downloaded]
749 loaded = [e for e in entries if e.loaded]
750 checklist.append({
751 'model_type': mt,
752 'status': 'ok' if entries else 'missing',
753 'registered': len(entries),
754 'downloaded': len(downloaded),
755 'loaded': len(loaded),
756 'models': [{'id': e.model_id, 'name': e.display_name,
757 'downloaded': e.downloaded, 'loaded': e.loaded}
758 for e in entries],
759 })
760 all_ok = all(c['status'] == 'ok' for c in checklist)
761 return jsonify({
762 'status': 'ok' if all_ok else 'incomplete',
763 'checklist': checklist,
764 'total_registered': sum(c['registered'] for c in checklist),
765 'total_downloaded': sum(c['downloaded'] for c in checklist),
766 }), 200
767 except Exception as e:
768 logger.error(f"API checklist error: {e}", exc_info=True)
769 return jsonify({'status': 'error', 'error': str(e)}), 500
771 @model_onboarding_bp.route('/api/models/downloaded', methods=['GET'])
772 def api_downloaded():
773 """List all downloaded GGUF models."""
774 try:
775 models = list_downloaded()
776 return jsonify({'status': 'ok', 'models': models}), 200
777 except Exception as e:
778 logger.error(f"API downloaded error: {e}", exc_info=True)
779 return jsonify({'status': 'error', 'error': str(e)}), 500
781 @model_onboarding_bp.route('/api/models/<model_id>', methods=['DELETE'])
782 def api_remove(model_id):
783 """Remove a downloaded GGUF model.
785 Path: /api/models/<model_id>
786 """
787 try:
788 success = remove_model(model_id)
789 if success:
790 return jsonify({'status': 'ok', 'removed': model_id}), 200
791 else:
792 return jsonify({'status': 'error', 'error': f'Model {model_id} not found'}), 404
793 except Exception as e:
794 logger.error(f"API remove error: {e}", exc_info=True)
795 return jsonify({'status': 'error', 'error': str(e)}), 500
797 return model_onboarding_bp
800# ── Module-level Blueprint accessor ─────────────────────────────────
802_blueprint_instance = None
803_blueprint_lock = threading.Lock()
806def get_blueprint():
807 """Get or create the model_onboarding Flask Blueprint.
809 Returns None if Flask is not installed.
810 """
811 global _blueprint_instance
812 if _blueprint_instance is None:
813 with _blueprint_lock:
814 if _blueprint_instance is None:
815 _blueprint_instance = _create_blueprint()
816 return _blueprint_instance
819# Convenience alias for registration in hart_intelligence_entry.py:
820# from integrations.service_tools.model_onboarding import model_onboarding_bp
821# if model_onboarding_bp: app.register_blueprint(model_onboarding_bp)
822model_onboarding_bp = get_blueprint()