Coverage for integrations/service_tools/model

1"""

2Model Onboarding — one-command flow to go from model name to running inference.

4 onboard("Qwen/Qwen3-8B")

5 # 1. Finds unsloth/Qwen3-8B-GGUF on HuggingFace

6 # 2. Picks Q4_K_M quantization for user's GPU

7 # 3. Downloads GGUF to ~/.hevolve/models/

8 # 4. Ensures llama.cpp binary is available

9 # 5. Starts llama-server with optimal params

10 # 6. Registers model in catalog + registry

11 # 7. Returns endpoint URL ready for inference

13Also provides CLI/API for listing available models, switching active model,

14and removing downloaded models.

15"""

17import logging

18import re

19import threading

20import time

21from pathlib import Path

22from typing import Dict, List, Optional

24logger = logging.getLogger(__name__)

27# ── Nunba companion detection ──────────────────────────────────────

29def _is_nunba_bundled() -> bool:

30 """Detect if Nunba (companion desktop app) is managing llama.cpp.

32 When HARTOS is pip-installed inside Nunba, `hartos_backend_adapter` is

33 in sys.modules. Nunba owns: llama.cpp lifecycle, model downloads,

34 config.json, port 8080. We should not duplicate that work.

35 """

36 import sys

37 return 'hartos_backend_adapter' in sys.modules

40def _onboard_via_nunba(model_name: str, quant: str, port: int) -> dict:

41 """Onboard a model by delegating to Nunba's existing infrastructure.

43 Nunba already has llama.cpp running on port 8080. We just need to

44 tell it to load a different model via its adapter, or check if

45 the requested model is already active.

46 """

47 import urllib.request

48 import urllib.error

49 import json

51 # Check if Nunba's llama.cpp is already running

52 llm_port = 8080

53 try:

54 req = urllib.request.urlopen(f'http://127.0.0.1:{llm_port}/health', timeout=3)

55 if req.status == 200:

56 logger.info("Nunba's llama.cpp already running on port %d", llm_port)

57 return {

58 'status': 'ready',

59 'model': model_name,

60 'quant': quant,

61 'endpoint': f'http://127.0.0.1:{llm_port}',

62 'source': 'nunba',

63 'note': 'Nunba manages the llama.cpp server. '

64 'Use Nunba settings to change models.',

65 }

66 except (urllib.error.URLError, OSError):

67 pass

69 # Nunba not running yet — tell the user

70 return {

71 'status': 'waiting',

72 'model': model_name,

73 'endpoint': f'http://127.0.0.1:{llm_port}',

74 'source': 'nunba',

75 'note': 'HARTOS is bundled with Nunba. Start the Nunba desktop app '

76 'to activate llama.cpp inference. Nunba manages model lifecycle.',

77 }

80# ── Module-level state ──────────────────────────────────────────────

82_onboard_lock = threading.Lock()

83_active_model: Optional[Dict] = None # tracks the currently running model

86# ── Lazy imports (all behind try/except) ────────────────────────────

88def _get_resolver():

89 """Lazy-load HFModelResolver singleton."""

90 try:

91 from integrations.service_tools.hf_model_resolver import get_resolver

92 return get_resolver()

93 except ImportError:

94 logger.warning("hf_model_resolver not available")

95 return None

98def _get_llamacpp_manager():

99 """Lazy-load llamacpp_manager singleton."""

100 try:

101 from integrations.service_tools.llamacpp_manager import get_llamacpp_manager

102 return get_llamacpp_manager()

103 except ImportError:

104 logger.warning("llamacpp_manager not available")

105 return None

106

107

108def _get_catalog():

109 """Lazy-load ModelCatalog singleton."""

110 try:

111 from integrations.service_tools.model_catalog import get_catalog

112 return get_catalog()

113 except ImportError:

114 logger.warning("model_catalog not available")

115 return None

116

117

118def _get_model_registry():

119 """Lazy-load ModelRegistry singleton."""

120 try:

121 from integrations.agent_engine.model_registry import model_registry

122 return model_registry

123 except ImportError:

124 logger.warning("model_registry not available")

125 return None

126

127

128def _get_vram_manager():

129 """Lazy-load VRAMManager singleton."""

130 try:

131 from integrations.service_tools.vram_manager import vram_manager

132 return vram_manager

133 except ImportError:

134 logger.warning("vram_manager not available")

135 return None

136

137

138def _get_default_port() -> int:

139 """Get the llama.cpp port from port_registry, default 8080."""

140 try:

141 from core.port_registry import get_port

142 return get_port('llm')

143 except Exception:

144 return 8080

145

146

147def _make_catalog_id(model_name: str, quant: str) -> str:

148 """Create a stable catalog ID from model name and quantization.

149

150 E.g. "Qwen/Qwen3-8B" + "Q4_K_M" -> "llm-qwen3-8b-q4-k-m"

151 """

152 # Take basename from repo-style names

153 if '/' in model_name:

154 basename = model_name.split('/')[-1]

155 else:

156 basename = model_name

157 slug = re.sub(r'[^a-z0-9]+', '-', basename.lower()).strip('-')

158 quant_slug = re.sub(r'[^a-z0-9]+', '-', quant.lower()).strip('-')

159 return f"llm-{slug}-{quant_slug}"

160

161

162def _extract_quant_from_path(gguf_path: Path) -> str:

163 """Extract quantization label from a GGUF filename."""

164 try:

165 from integrations.service_tools.hf_model_resolver import _extract_quant

166 q = _extract_quant(gguf_path.name)

167 if q:

168 return q

169 except ImportError:

170 pass

171 # Fallback regex

172 m = re.search(r'((?:IQ|Q)\d+(?:_K)?(?:_[A-Z0-9]+)?)', gguf_path.name, re.IGNORECASE)

173 return m.group(1).upper() if m else 'unknown'

174

175

176# ── Core functions ──────────────────────────────────────────────────

177

178def onboard(model_name: str, quant: str = 'auto', port: int = 0) -> Dict:

179 """Full onboarding pipeline: resolve, download, start, register.

180

181 Args:

182 model_name: HuggingFace model identifier (e.g. "Qwen/Qwen3-8B").

183 quant: Quantization level ('Q4_K_M', 'Q8_0', etc.) or 'auto'.

184 port: Port for llama-server. 0 = use port registry default.

185

186 Returns:

187 Status dict with keys: status, model, quant, endpoint, gguf_path.

188 On error: status='error', error=<message>.

189 """

190 global _active_model

191

192 if port == 0:

193 port = _get_default_port()

194

195 # ── Nunba companion detection ──

196 # When Nunba (sibling repo) is installed, it owns the llama.cpp lifecycle

197 # and model management. We defer to it instead of duplicating.

198 if _is_nunba_bundled():

199 return _onboard_via_nunba(model_name, quant, port)

200

201 with _onboard_lock:

202 try:

203 # Step 1: Resolve and download GGUF

204 resolver = _get_resolver()

205 if resolver is None:

206 return {

207 'status': 'error',

208 'error': 'hf_model_resolver is not available. '

209 'Install huggingface_hub: pip install huggingface_hub',

210 }

211

212 logger.info(f"Onboarding {model_name} (quant={quant}, port={port})")

213 gguf_path = resolver.resolve(model_name, quant)

214 quant_used = _extract_quant_from_path(gguf_path)

215

216 # Step 2: Ensure llama.cpp binary is available

217 lcpp = _get_llamacpp_manager()

218 if lcpp is None:

219 return {

220 'status': 'error',

221 'error': 'llamacpp_manager is not available',

222 }

223

224 server_bin = lcpp.get_server_binary()

225 if server_bin is None:

226 logger.info("llama-server binary not found, downloading...")

227 lcpp.download_server()

228 server_bin = lcpp.get_server_binary()

229 if server_bin is None:

230 return {

231 'status': 'error',

232 'error': 'Failed to obtain llama-server binary',

233 }

234

235 # Step 3: Start llama-server

236 logger.info(f"Starting llama-server on port {port}...")

237 if not lcpp.start(str(gguf_path), port):

238 return {

239 'status': 'error',

240 'error': f'llama-server failed to start on port {port}. '

241 f'Check if the port is in use or the GGUF file is valid.',

242 }

243

244 # Step 4: Register in catalog

245 catalog_id = _make_catalog_id(model_name, quant_used)

246 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port)

247

248 # Step 5: Register in model registry

249 _register_in_registry(catalog_id, model_name, port)

250

251 # Step 6: Track active model

252 endpoint = f'http://127.0.0.1:{port}'

253 _active_model = {

254 'catalog_id': catalog_id,

255 'model': model_name,

256 'quant': quant_used,

257 'endpoint': endpoint,

258 'gguf_path': str(gguf_path),

259 'port': port,

260 'started_at': time.time(),

261 }

262

263 result = {

264 'status': 'ready',

265 'model': model_name,

266 'quant': quant_used,

267 'endpoint': endpoint,

268 'gguf_path': str(gguf_path),

269 }

270 logger.info(f"Onboarding complete: {model_name} ({quant_used}) at {endpoint}")

271 return result

272

273 except FileNotFoundError as e:

274 logger.error(f"Onboard failed — model not found: {e}")

275 return {'status': 'error', 'error': str(e)}

276 except ImportError as e:

277 logger.error(f"Onboard failed — missing dependency: {e}")

278 return {'status': 'error', 'error': str(e)}

279 except Exception as e:

280 logger.error(f"Onboard failed: {e}", exc_info=True)

281 return {'status': 'error', 'error': str(e)}

282

283

284def switch_model(model_name: str, quant: str = 'auto') -> Dict:

285 """Hot-swap the active model without full restart.

286

287 Resolves and downloads if needed, then calls llamacpp_manager.swap_model().

288 Updates catalog and registry entries.

289

290 Args:

291 model_name: HuggingFace model identifier.

292 quant: Quantization level or 'auto'.

293

294 Returns:

295 Status dict. On error: status='error', error=<message>.

296 """

297 global _active_model

298

299 with _onboard_lock:

300 try:

301 # Resolve + download

302 resolver = _get_resolver()

303 if resolver is None:

304 return {

305 'status': 'error',

306 'error': 'hf_model_resolver is not available',

307 }

308

309 gguf_path = resolver.resolve(model_name, quant)

310 quant_used = _extract_quant_from_path(gguf_path)

311

312 # Swap model in running server

313 lcpp = _get_llamacpp_manager()

314 if lcpp is None:

315 return {

316 'status': 'error',

317 'error': 'llamacpp_manager is not available',

318 }

319

320 logger.info(f"Swapping to {model_name} ({quant_used})...")

321 lcpp.swap_model(str(gguf_path))

322

323 # Unmark previous active model in catalog

324 if _active_model:

325 catalog = _get_catalog()

326 if catalog:

327 catalog.mark_unloaded(_active_model.get('catalog_id', ''))

328

329 # Register new model

330 catalog_id = _make_catalog_id(model_name, quant_used)

331 port = (_active_model or {}).get('port', _get_default_port())

332 _register_in_catalog(catalog_id, model_name, quant_used, gguf_path, port)

333 _register_in_registry(catalog_id, model_name, port)

334

335 endpoint = f'http://127.0.0.1:{port}'

336 _active_model = {

337 'catalog_id': catalog_id,

338 'model': model_name,

339 'quant': quant_used,

340 'endpoint': endpoint,

341 'gguf_path': str(gguf_path),

342 'port': port,

343 'started_at': time.time(),

344 }

345

346 result = {

347 'status': 'ready',

348 'model': model_name,

349 'quant': quant_used,

350 'endpoint': endpoint,

351 'gguf_path': str(gguf_path),

352 }

353 logger.info(f"Model swap complete: {model_name} ({quant_used})")

354 return result

355

356 except FileNotFoundError as e:

357 logger.error(f"Switch failed — model not found: {e}")

358 return {'status': 'error', 'error': str(e)}

359 except Exception as e:

360 logger.error(f"Switch failed: {e}", exc_info=True)

361 return {'status': 'error', 'error': str(e)}

362

363

364def list_downloaded() -> List[Dict]:

365 """List all downloaded GGUF models with their sizes, quant types, and paths.

366

367 Returns:

368 List of dicts with keys: filename, quant, size_bytes, size_gb, path, repo.

369 """

370 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf'

371 if not gguf_dir.exists():

372 return []

373

374 results = []

375 for repo_dir in gguf_dir.iterdir():

376 if not repo_dir.is_dir():

377 continue

378 for gguf_file in repo_dir.glob('*.gguf'):

379 try:

380 size_bytes = gguf_file.stat().st_size

381 except OSError:

382 size_bytes = 0

383

384 quant_label = _extract_quant_from_path(gguf_file)

385 # Convert repo dir name back to repo_id

386 repo_name = repo_dir.name.replace('--', '/')

387

388 results.append({

389 'filename': gguf_file.name,

390 'quant': quant_label,

391 'size_bytes': size_bytes,

392 'size_gb': round(size_bytes / (1024 ** 3), 2),

393 'path': str(gguf_file),

394 'repo': repo_name,

395 })

396

397 # Sort by size descending (biggest models first)

398 results.sort(key=lambda x: x['size_bytes'], reverse=True)

399 return results

400

401

402def list_available(model_name: str) -> List[Dict]:

403 """List available GGUF files for a model on HuggingFace.

404

405 Proxy to hf_model_resolver.list_available().

406

407 Args:

408 model_name: e.g. "Qwen/Qwen3-8B"

409

410 Returns:

411 List of dicts with keys: repo_id, filename, quant, quant_rank, size_bytes.

412 On error: empty list.

413 """

414 resolver = _get_resolver()

415 if resolver is None:

416 return []

417 try:

418 return resolver.list_available(model_name)

419 except ImportError as e:

420 logger.warning(f"Cannot list available models: {e}")

421 return []

422 except Exception as e:

423 logger.error(f"Error listing available models: {e}")

424 return []

425

426

427def remove_model(model_id: str) -> bool:

428 """Remove a downloaded GGUF model and its catalog entry.

429

430 Args:

431 model_id: Either a catalog ID (e.g. "llm-qwen3-8b-q4-k-m") or

432 a GGUF filename (e.g. "Qwen3-8B-Q4_K_M.gguf").

433

434 Returns:

435 True if something was removed, False otherwise.

436 """

437 global _active_model

438 removed = False

439

440 # If active model matches, stop tracking

441 if _active_model and _active_model.get('catalog_id') == model_id:

442 _active_model = None

443

444 # Remove from catalog

445 catalog = _get_catalog()

446 if catalog:

447 if catalog.unregister(model_id):

448 removed = True

449

450 # Try to find and delete the GGUF file

451 gguf_dir = Path.home() / '.hevolve' / 'models' / 'gguf'

452 if gguf_dir.exists():

453 for gguf_file in gguf_dir.rglob('*.gguf'):

454 # Match by filename or by catalog_id derived from filename

455 if (gguf_file.name == model_id or

456 _make_catalog_id(

457 gguf_file.parent.name.replace('--', '/'),

458 _extract_quant_from_path(gguf_file)

459 ) == model_id):

460 try:

461 gguf_file.unlink()

462 logger.info(f"Deleted GGUF file: {gguf_file}")

463 removed = True

464 # Clean up empty parent directory

465 try:

466 if not any(gguf_file.parent.iterdir()):

467 gguf_file.parent.rmdir()

468 except OSError:

469 pass

470 except OSError as e:

471 logger.error(f"Failed to delete {gguf_file}: {e}")

472

473 # Remove from storage manifest

474 try:

475 from integrations.service_tools.model_storage import model_storage

476 # Check all gguf/* entries in the manifest

477 manifest = model_storage.get_manifest()

478 for tool_name in list(manifest.get('tools', {}).keys()):

479 if tool_name.startswith('gguf/') and model_id in tool_name:

480 model_storage.remove_tool(tool_name)

481 removed = True

482 except ImportError:

483 pass

484

485 if removed:

486 logger.info(f"Removed model: {model_id}")

487 else:

488 logger.warning(f"Model not found for removal: {model_id}")

489

490 return removed

491

492

493def get_active_model() -> Optional[Dict]:

494 """Return info about the currently running model, or None if nothing is active.

495

496 Returns:

497 Dict with keys: catalog_id, model, quant, endpoint, gguf_path, port,

498 started_at, uptime_s.

499 """

500 if _active_model is None:

501 return None

502

503 result = dict(_active_model)

504 result['uptime_s'] = round(time.time() - result.get('started_at', time.time()), 1)

505 return result

506

507

508def status() -> Dict:

509 """Return full onboarding status: active model, server health, VRAM, downloads.

510

511 Returns:

512 Dict with keys: active_model, server_healthy, vram, downloaded_count,

513 downloaded_size_gb.

514 """

515 result = {

516 'active_model': get_active_model(),

517 'server_healthy': False,

518 'vram': {},

519 'downloaded_count': 0,

520 'downloaded_size_gb': 0.0,

521 }

522

523 # Server health check

524 lcpp = _get_llamacpp_manager()

525 if lcpp and _active_model:

526 try:

527 # Try to check if the server process is running

528 healthy = lcpp.is_running() if hasattr(lcpp, 'is_running') else False

529 result['server_healthy'] = healthy

530 except Exception:

531 result['server_healthy'] = False

532

533 # VRAM info

534 vm = _get_vram_manager()

535 if vm:

536 try:

537 gpu_info = vm.detect_gpu()

538 result['vram'] = {

539 'gpu_name': gpu_info.get('name'),

540 'total_gb': gpu_info.get('total_gb', 0.0),

541 'free_gb': gpu_info.get('free_gb', 0.0),

542 'cuda_available': gpu_info.get('cuda_available', False),

543 }

544 except Exception:

545 pass

546

547 # Downloaded models

548 downloaded = list_downloaded()

549 result['downloaded_count'] = len(downloaded)

550 result['downloaded_size_gb'] = round(

551 sum(m.get('size_gb', 0.0) for m in downloaded), 2

552 )

553

554 return result

555

556

557# ── Registration helpers ────────────────────────────────────────────

558

559def _register_in_catalog(catalog_id: str, model_name: str, quant: str,

560 gguf_path: Path, port: int) -> None:

561 """Register or update a model entry in the ModelCatalog."""

562 catalog = _get_catalog()

563 if catalog is None:

564 return

565

566 try:

567 from integrations.service_tools.model_catalog import ModelEntry, ModelType

568 except ImportError:

569 return

570

571 # Human-readable display name

572 if '/' in model_name:

573 display_name = model_name.split('/')[-1]

574 else:

575 display_name = model_name

576

577 # Calculate size for disk_gb

578 try:

579 disk_gb = round(gguf_path.stat().st_size / (1024 ** 3), 2)

580 except OSError:

581 disk_gb = 0.0

582

583 entry = ModelEntry(

584 id=catalog_id,

585 name=f"{display_name} ({quant})",

586 model_type=ModelType.LLM,

587 source='huggingface',

588 repo_id=model_name,

589 files={'model': gguf_path.name, 'path': str(gguf_path)},

590 disk_gb=disk_gb,

591 backend='llama.cpp',

592 supports_gpu=True,

593 supports_cpu=True,

594 quality_score=0.7,

595 speed_score=0.8,

596 cost_per_1k=0.0,

597 tags=['local', 'gguf', 'onboarded', quant.lower()],

598 capabilities={

599 'quant': quant,

600 'endpoint': f'http://127.0.0.1:{port}',

601 'openai_compatible': True,

602 },

603 )

604

605 catalog.register(entry)

606 catalog.mark_downloaded(catalog_id)

607 catalog.mark_loaded(catalog_id, device='gpu')

608 logger.info(f"Registered {catalog_id} in model catalog")

609

610

611def _register_in_registry(catalog_id: str, model_name: str, port: int) -> None:

612 """Register a ModelBackend in the ModelRegistry for LLM routing."""

613 registry = _get_model_registry()

614 if registry is None:

615 return

616

617 try:

618 from integrations.agent_engine.model_registry import ModelBackend, ModelTier

619 except ImportError:

620 return

621

622 # Human-readable display name

623 if '/' in model_name:

624 display_name = model_name.split('/')[-1]

625 else:

626 display_name = model_name

627

628 backend = ModelBackend(

629 model_id=catalog_id,

630 display_name=display_name,

631 tier=ModelTier.FAST,

632 config_list_entry={

633 'model': catalog_id,

634 'base_url': f'http://127.0.0.1:{port}/v1',

635 'api_key': 'not-needed',

636 },

637 avg_latency_ms=500.0,

638 accuracy_score=0.7,

639 cost_per_1k_tokens=0.0,

640 is_local=True,

641 hardware_dependent=True,

642 )

643

644 registry.register(backend)

645 logger.info(f"Registered {catalog_id} in model registry (tier=FAST, local)")

646

647

648# ── Flask Blueprint (lazy) ──────────────────────────────────────────

649

650def _create_blueprint():

651 """Create the Flask Blueprint for model onboarding API endpoints.

652

653 Imports Flask lazily so this module can be imported without Flask

654 being installed.

655 """

656 try:

657 from flask import Blueprint, request, jsonify

658 except ImportError:

659 logger.debug("Flask not available — model_onboarding blueprint disabled")

660 return None

661

662 model_onboarding_bp = Blueprint('model_onboarding', __name__)

663

664 @model_onboarding_bp.route('/api/models/onboard', methods=['POST'])

665 def api_onboard():

666 """Onboard a new model from HuggingFace.

667

668 Body: {"model": "Qwen/Qwen3-8B", "quant": "auto", "port": 8080}

669 """

670 try:

671 data = request.get_json(force=True, silent=True) or {}

672 model = data.get('model', '').strip()

673 if not model:

674 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400

675

676 q = data.get('quant', 'auto')

677 p = data.get('port', 0)

678 result = onboard(model, quant=q, port=int(p))

679

680 code = 200 if result.get('status') == 'ready' else 500

681 return jsonify(result), code

682 except Exception as e:

683 logger.error(f"API onboard error: {e}", exc_info=True)

684 return jsonify({'status': 'error', 'error': str(e)}), 500

685

686 @model_onboarding_bp.route('/api/models/switch', methods=['POST'])

687 def api_switch():

688 """Switch the active model.

689

690 Body: {"model": "meta-llama/Llama-3.1-8B", "quant": "auto"}

691 """

692 try:

693 data = request.get_json(force=True, silent=True) or {}

694 model = data.get('model', '').strip()

695 if not model:

696 return jsonify({'status': 'error', 'error': 'Missing "model" field'}), 400

697

698 q = data.get('quant', 'auto')

699 result = switch_model(model, quant=q)

700

701 code = 200 if result.get('status') == 'ready' else 500

702 return jsonify(result), code

703 except Exception as e:

704 logger.error(f"API switch error: {e}", exc_info=True)

705 return jsonify({'status': 'error', 'error': str(e)}), 500

706

707 @model_onboarding_bp.route('/api/models/available', methods=['GET'])

708 def api_available():

709 """List available GGUF files for a model on HuggingFace.

710

711 Query: ?model=Qwen/Qwen3-8B

712 """

713 try:

714 model = request.args.get('model', '').strip()

715 if not model:

716 return jsonify({'status': 'error', 'error': 'Missing "model" query param'}), 400

717

718 results = list_available(model)

719 return jsonify({'status': 'ok', 'models': results}), 200

720 except Exception as e:

721 logger.error(f"API available error: {e}", exc_info=True)

722 return jsonify({'status': 'error', 'error': str(e)}), 500

723

724 @model_onboarding_bp.route('/api/models/status', methods=['GET'])

725 def api_status():

726 """Return full onboarding status."""

727 try:

728 return jsonify(status()), 200

729 except Exception as e:

730 logger.error(f"API status error: {e}", exc_info=True)

731 return jsonify({'status': 'error', 'error': str(e)}), 500

732

733 @model_onboarding_bp.route('/api/models/checklist', methods=['GET'])

734 def api_checklist():

735 """T19: Model onboarding checklist — validates all model types have entries.

736

737 Returns a checklist of 8 model types with status (ok/missing/error)

738 and the number of registered + downloaded models per type.

739 """

740 try:

741 from .model_catalog import get_catalog

742 catalog = get_catalog()

743 MODEL_TYPES = ['llm', 'tts', 'stt', 'vlm', 'image_gen',

744 'video_gen', 'audio_gen', 'embedding']

745 checklist = []

746 for mt in MODEL_TYPES:

747 entries = [e for e in catalog.list_all() if e.model_type == mt]

748 downloaded = [e for e in entries if e.downloaded]

749 loaded = [e for e in entries if e.loaded]

750 checklist.append({

751 'model_type': mt,

752 'status': 'ok' if entries else 'missing',

753 'registered': len(entries),

754 'downloaded': len(downloaded),

755 'loaded': len(loaded),

756 'models': [{'id': e.model_id, 'name': e.display_name,

757 'downloaded': e.downloaded, 'loaded': e.loaded}

758 for e in entries],

759 })

760 all_ok = all(c['status'] == 'ok' for c in checklist)

761 return jsonify({

762 'status': 'ok' if all_ok else 'incomplete',

763 'checklist': checklist,

764 'total_registered': sum(c['registered'] for c in checklist),

765 'total_downloaded': sum(c['downloaded'] for c in checklist),

766 }), 200

767 except Exception as e:

768 logger.error(f"API checklist error: {e}", exc_info=True)

769 return jsonify({'status': 'error', 'error': str(e)}), 500

770

771 @model_onboarding_bp.route('/api/models/downloaded', methods=['GET'])

772 def api_downloaded():

773 """List all downloaded GGUF models."""

774 try:

775 models = list_downloaded()

776 return jsonify({'status': 'ok', 'models': models}), 200

777 except Exception as e:

778 logger.error(f"API downloaded error: {e}", exc_info=True)

779 return jsonify({'status': 'error', 'error': str(e)}), 500

780

781 @model_onboarding_bp.route('/api/models/<model_id>', methods=['DELETE'])

782 def api_remove(model_id):

783 """Remove a downloaded GGUF model.

784

785 Path: /api/models/<model_id>

786 """

787 try:

788 success = remove_model(model_id)

789 if success:

790 return jsonify({'status': 'ok', 'removed': model_id}), 200

791 else:

792 return jsonify({'status': 'error', 'error': f'Model {model_id} not found'}), 404

793 except Exception as e:

794 logger.error(f"API remove error: {e}", exc_info=True)

795 return jsonify({'status': 'error', 'error': str(e)}), 500

796

797 return model_onboarding_bp

798

799

800# ── Module-level Blueprint accessor ─────────────────────────────────

801

802_blueprint_instance = None

803_blueprint_lock = threading.Lock()

804

805

806def get_blueprint():

807 """Get or create the model_onboarding Flask Blueprint.

808

809 Returns None if Flask is not installed.

810 """

811 global _blueprint_instance

812 if _blueprint_instance is None:

813 with _blueprint_lock:

814 if _blueprint_instance is None:

815 _blueprint_instance = _create_blueprint()

816 return _blueprint_instance

817

818

819# Convenience alias for registration in hart_intelligence_entry.py:

820# from integrations.service_tools.model_onboarding import model_onboarding_bp

821# if model_onboarding_bp: app.register_blueprint(model_onboarding_bp)

822model_onboarding_bp = get_blueprint()

Coverage for integrations / service_tools / model_onboarding.py: 17.9%

313 statements