Coverage for core / port_registry.py: 80.9%

230 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Port Registry — Single source of truth for HART OS service ports. 

3 

4Two modes: 

5 APP MODE (default): User-space ports (6777+) for running alongside other apps 

6 OS MODE (NixOS): Privileged ports (<1024) for running as the operating system 

7 

8OS mode is activated when: 

9 - HART_OS_MODE=true environment variable is set, OR 

10 - /etc/os-release contains ID=hart-os (NixOS deployment) 

11 

12Privileged ports (<1024) require root/systemd, which is correct for OS daemons. 

13This frees ports 1024-65535 for user applications. 

14 

15Usage: 

16 from core.port_registry import get_port, get_all_ports, is_os_mode 

17 

18 port = get_port('backend') # 677 (OS mode) or 6777 (app mode) 

19 port = get_port('backend', 9999) # Override with specific port 

20""" 

21 

22import logging 

23import os 

24import socket 

25 

26logger = logging.getLogger('hevolve.ports') 

27 

28# ── Port Definitions ────────────────────────────────────────── 

29 

30# App mode: user-space ports (coexist with other software) 

31APP_PORTS = { 

32 'backend': 6777, 

33 'discovery': 6780, 

34 'vision': 9891, 

35 'llm': 8080, 

36 'websocket': 5460, 

37 'diarization': 8004, 

38 'stt_stream': 8005, 

39 'dlna_stream': 8554, 

40 'mesh_wg': 6795, 

41 'mesh_relay': 6796, 

42 'model_bus': 6790, 

43 'mcp': 6791, 

44 'vlm_caption': 8081, 

45 # Nunba Flask (the user-facing app server) + langchain GPT-API 

46 # sidecar. Previously hardcoded in core/health_probe.py — moved 

47 # here per #460 so the probes (and any future caller) walks the 

48 # canonical resolver instead of duplicating the literal port. 

49 'flask': 5000, 

50 'langchain': 6778, 

51} 

52 

53# OS mode: privileged ports (HART OS is the operating system) 

54OS_PORTS = { 

55 'backend': 677, 

56 'discovery': 678, 

57 'vision': 989, 

58 'llm': 808, 

59 'websocket': 546, 

60 'diarization': 800, 

61 'stt_stream': 801, 

62 'dlna_stream': 855, 

63 'mesh_wg': 679, 

64 'mesh_relay': 680, 

65 'model_bus': 681, 

66 'mcp': 682, 

67 'vlm_caption': 809, 

68 'flask': 500, 

69 # 778 (not 677) to avoid colliding with backend=677 in OS mode. 

70 'langchain': 778, 

71} 

72 

73# Environment variable overrides (takes precedence over both modes) 

74ENV_OVERRIDES = { 

75 'backend': 'HARTOS_BACKEND_PORT', 

76 'discovery': 'HART_DISCOVERY_PORT', 

77 'vision': 'HART_VISION_PORT', 

78 'llm': 'HART_LLM_PORT', 

79 'websocket': 'HART_WS_PORT', 

80 'diarization': 'HEVOLVE_DIARIZATION_PORT', 

81 'stt_stream': 'HART_STT_STREAM_PORT', 

82 'dlna_stream': 'HART_DLNA_PORT', 

83 'mesh_wg': 'HART_MESH_WG_PORT', 

84 'mesh_relay': 'HART_MESH_RELAY_PORT', 

85 'model_bus': 'HART_MODEL_BUS_PORT', 

86 'mcp': 'HART_MCP_PORT', 

87 'vlm_caption': 'HEVOLVE_VLM_CAPTION_PORT', 

88 'flask': 'HART_FLASK_PORT', 

89 'langchain': 'HART_LANGCHAIN_PORT', 

90} 

91 

92 

93# ── Detection ───────────────────────────────────────────────── 

94 

95_os_mode_cached = None 

96 

97 

98def is_os_mode() -> bool: 

99 """Detect if running as HART OS (the operating system). 

100 

101 True when: 

102 - HART_OS_MODE=true env var, OR 

103 - /etc/os-release contains ID=hart-os (NixOS deployment) 

104 """ 

105 global _os_mode_cached 

106 if _os_mode_cached is not None: 

107 return _os_mode_cached 

108 

109 # Explicit env var 

110 if os.environ.get('HART_OS_MODE', '').lower() in ('true', '1', 'yes'): 

111 _os_mode_cached = True 

112 return True 

113 

114 # NixOS detection: check /etc/os-release 

115 try: 

116 with open('/etc/os-release', 'r') as f: 

117 for line in f: 

118 if line.strip().startswith('ID=') and 'hart-os' in line: 

119 _os_mode_cached = True 

120 return True 

121 except (FileNotFoundError, PermissionError): 

122 pass 

123 

124 _os_mode_cached = False 

125 return False 

126 

127 

128# ── Port Resolution ─────────────────────────────────────────── 

129 

130def get_port(service: str, override: int = None) -> int: 

131 """Get the port for a HART OS service. 

132 

133 Resolution order: 

134 1. Explicit override parameter 

135 2. Environment variable (HARTOS_BACKEND_PORT, etc.) 

136 3. OS-mode port (if running as HART OS) 

137 4. App-mode port (default) 

138 

139 Args: 

140 service: Service name ('backend', 'discovery', 'vision', etc.) 

141 override: Explicit port override (highest priority). 

142 

143 Returns: 

144 Port number. 

145 """ 

146 # 1. Explicit override 

147 if override is not None: 

148 return override 

149 

150 # 2. Environment variable 

151 env_var = ENV_OVERRIDES.get(service) 

152 if env_var: 

153 env_val = os.environ.get(env_var) 

154 if env_val: 

155 try: 

156 return int(env_val) 

157 except ValueError: 

158 logger.warning(f"Invalid port in {env_var}={env_val}, using default") 

159 

160 # 3. OS mode vs App mode 

161 if is_os_mode(): 

162 return OS_PORTS.get(service, APP_PORTS.get(service, 0)) 

163 

164 return APP_PORTS.get(service, 0) 

165 

166 

167def get_all_ports() -> dict: 

168 """Get all service ports as a dict.""" 

169 return {service: get_port(service) for service in APP_PORTS} 

170 

171 

172def check_port_available(port: int, host: str = '0.0.0.0') -> bool: 

173 """Check if a port is available for binding. 

174 

175 Args: 

176 port: Port number to check. 

177 host: Host to check on. 

178 

179 Returns: 

180 True if port is available. 

181 """ 

182 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

183 try: 

184 s.settimeout(1) 

185 s.bind((host, port)) 

186 return True 

187 except OSError: 

188 return False 

189 finally: 

190 s.close() 

191 

192 

193def get_mode_label() -> str: 

194 """Return 'OS' or 'APP' for display.""" 

195 return 'OS' if is_os_mode() else 'APP' 

196 

197 

198# ── LLM URL Resolution ────────────────────────────────────── 

199 

200_llm_url_cache: str = '' 

201_llm_url_cache_ts: float = 0.0 

202_LLM_URL_CACHE_TTL: float = 30.0 # seconds — re-resolve after this 

203_LLM_PROBE_TIMEOUT: float = 1.0 # seconds per candidate probe 

204_LLM_PROBE_NEG_TTL: float = 10.0 # seconds — cache "dead" verdicts 

205_llm_probe_cache: dict = {} # url → (is_healthy, ts) 

206_llm_url_last_announced: str = '' # for change-toast emission 

207 

208 

209def _probe_llm_endpoint(url: str) -> bool: 

210 """Cheap TCP-connect probe with short-lived result caching. 

211 

212 True if something is listening on the URL's host:port. No HTTP, 

213 no body — just confirms the port is open. Result is cached for 

214 ``_LLM_PROBE_NEG_TTL`` seconds so repeated resolver calls don't 

215 re-probe dead candidates and burn 1s each time. 

216 

217 Used by ``get_local_llm_url`` to walk candidate URLs and pick the 

218 first reachable one instead of returning the first non-empty config 

219 field and discovering it's dead at synth time. Exception path 

220 returns False — we never raise from a probe. 

221 """ 

222 import time 

223 cached = _llm_probe_cache.get(url) 

224 if cached is not None: 

225 ok, ts = cached 

226 if (time.time() - ts) < _LLM_PROBE_NEG_TTL: 

227 return ok 

228 try: 

229 import socket 

230 body = url.split('://', 1)[-1].split('/', 1)[0] 

231 host, _, port_s = body.partition(':') 

232 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

233 s.settimeout(_LLM_PROBE_TIMEOUT) 

234 try: 

235 ok = s.connect_ex( 

236 (host or '127.0.0.1', int(port_s or 0))) == 0 

237 finally: 

238 s.close() 

239 except Exception: 

240 ok = False 

241 _llm_probe_cache[url] = (ok, time.time()) 

242 return ok 

243 

244 

245def _is_loopback_url(url: str) -> bool: 

246 """True iff the URL points at this machine. Used to gate auto- 

247 correct of stale config fields — never rewrite a non-loopback URL 

248 (could be a real remote endpoint the user explicitly chose).""" 

249 try: 

250 body = url.split('://', 1)[-1].split('/', 1)[0] 

251 host = body.partition(':')[0].lower() 

252 return host in ('127.0.0.1', 'localhost', '0.0.0.0', '[::1]', '') 

253 except Exception: 

254 return False 

255 

256 

257def _autocorrect_stale_loopback_config(healthy_url: str) -> None: 

258 """Rewrite drifted loopback URLs in ``~/.nunba/llama_config.json`` 

259 so non-resolver readers (chat path's direct read of 

260 ``external_llm_endpoint.base_url``) also see the live URL. 

261 

262 Safety rails: 

263 * Only touches fields whose CURRENT value is loopback — a real 

264 remote endpoint the user typed (e.g. a cloud OpenAI-compat URL) 

265 is preserved verbatim. The toast still fires; the user fixes 

266 externally. 

267 * Only writes if at least one field actually changed — no 

268 timestamp churn on the config file otherwise. 

269 * Never raises. 

270 

271 This is the auto-correct the user asked for: when the resolver 

272 detects drift between two source-of-truth fields, the stale one 

273 gets healed in place so the next boot reads cleanly without an 

274 operator edit. 

275 """ 

276 if not _is_loopback_url(healthy_url): 

277 return 

278 try: 

279 import json as _json 

280 cfg_path = os.path.join( 

281 os.path.expanduser('~'), '.nunba', 'llama_config.json') 

282 if not os.path.isfile(cfg_path): 

283 return 

284 with open(cfg_path) as _f: 

285 cfg = _json.load(_f) 

286 changed = False 

287 # external_llm_endpoint — only auto-fix if its current value is 

288 # loopback (i.e. it was *intended* to point at this machine). 

289 ext = cfg.get('external_llm_endpoint') or {} 

290 ext_base = ext.get('base_url') or '' 

291 if ext_base and _is_loopback_url(ext_base) and ext_base != healthy_url: 

292 ext['base_url'] = healthy_url 

293 ext['completions'] = ( 

294 healthy_url.rstrip('/').removesuffix('/v1') 

295 + '/v1/chat/completions' 

296 ) 

297 cfg['external_llm_endpoint'] = ext 

298 changed = True 

299 # custom_api_base — same shape, host-portion of healthy URL. 

300 cab = cfg.get('custom_api_base') or '' 

301 healthy_host = healthy_url.rstrip('/').removesuffix('/v1') 

302 if cab and _is_loopback_url(cab) and cab != healthy_host: 

303 cfg['custom_api_base'] = healthy_host 

304 changed = True 

305 if changed: 

306 with open(cfg_path, 'w') as _f: 

307 _json.dump(cfg, _f, indent=2) 

308 logger.info( 

309 "[LLM URL] Auto-corrected stale loopback config to %s", 

310 healthy_url) 

311 except Exception as e: 

312 logger.debug("[LLM URL] auto-correct skipped: %s", e) 

313 

314 

315def _emit_llm_url_change_toast(new_url: str) -> None: 

316 """Best-effort WAMP toast when the resolved LLM URL changes. 

317 

318 Fires once per actual transition; subsequent resolver calls that 

319 return the same URL are silent. Surfaces drift (config stale, 

320 llama-server moved port, external endpoint went down → fell back to 

321 bundled, etc.) to the user without log-spam. Never raises. 

322 """ 

323 global _llm_url_last_announced 

324 if new_url == _llm_url_last_announced: 

325 return 

326 _llm_url_last_announced = new_url 

327 try: 

328 from core.realtime import publish_async as _wamp_pub 

329 _wamp_pub( 

330 'com.hertzai.hevolve.llm.endpoint_changed', 

331 {'url': new_url, 'reason': 'resolver fall-through'}, 

332 timeout=0.3, 

333 ) 

334 except Exception: 

335 pass 

336 

337 

338def get_local_draft_url() -> str: 

339 """Single source of truth for the local DRAFT LLM endpoint URL. 

340 

341 The draft model is the Qwen3.5-0.8B instance that answers 

342 dispatch_draft_first calls and generates continuous video captions. 

343 

344 On ≥8GB VRAM, draft runs on a SEPARATE port (8081) from the main 

345 model (8080) so both stay resident simultaneously. 

346 

347 On ≤6GB VRAM (no separate draft server), the draft URL points to 

348 the MAIN model's port so the same model serves both roles — draft 

349 classification AND agentic responses. This avoids the "draft offline 

350 → fall through → slow main" latency penalty by letting the speculative 

351 dispatcher talk to whatever model IS running. 

352 

353 Resolution order: 

354 1. HEVOLVE_DRAFT_LLM_URL — full URL override 

355 2. HEVOLVE_VLM_CAPTION_PORT — port override (separate draft server) 

356 3. If draft server is running on default port → use it 

357 4. Otherwise → fall back to main LLM URL (same model, dual role) 

358 

359 Returns full URL with /v1 suffix (OpenAI-compatible). 

360 """ 

361 url = os.environ.get('HEVOLVE_DRAFT_LLM_URL', '').strip() 

362 if not url: 

363 port = os.environ.get('HEVOLVE_VLM_CAPTION_PORT', '').strip() 

364 if not port: 

365 port = str(get_port('vlm_caption')) 

366 url = f'http://127.0.0.1:{port}/v1' 

367 

368 url = url.rstrip('/') 

369 if not url.endswith('/v1'): 

370 url += '/v1' 

371 

372 # If draft port has no server, use the main LLM instead (single-model mode). 

373 # This makes the main model serve BOTH draft and agentic roles on low VRAM. 

374 try: 

375 import socket 

376 _body = url.split('://', 1)[-1].split('/', 1)[0] 

377 host, _, port_s = _body.partition(':') 

378 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

379 s.settimeout(0.3) 

380 result = s.connect_ex((host or '127.0.0.1', int(port_s or 8081))) 

381 s.close() 

382 if result != 0: 

383 # Draft port not listening → use main model as draft 

384 return get_local_llm_url() 

385 except Exception: 

386 pass 

387 

388 return url 

389 

390 

391def get_local_llm_url() -> str: 

392 """Single source of truth for the local LLM endpoint URL. 

393 

394 Resolution order — every non-empty candidate is PROBED, and the 

395 first reachable one wins. Stale-but-configured URLs (e.g. wizard 

396 wrote 8080, llama-server later moved to 8082) no longer cause silent 

397 chat hangs. Probe results are cached for a short TTL so the walk 

398 is cheap on the chat hot path. 

399 

400 Candidate order: 

401 1. HEVOLVE_LOCAL_LLM_URL — canonical env override 

402 2. CUSTOM_LLM_BASE_URL — user-provided custom endpoint 

403 3. LLAMA_CPP_PORT — deprecated port-only env var 

404 4. ~/.nunba/llama_config.json: external_llm_endpoint.base_url 

405 — wizard-recorded "external" (often 

406 actually a loopback, see drift bug 

407 2026-04-29) 

408 5. ~/.nunba/llama_config.json: server_port 

409 — Nunba's auto-managed bundled server 

410 6. ~/.nunba/llama_config.json: custom_api_base 

411 — wizard mirror of server_port 

412 7. port_registry default — get_port('llm') 

413 

414 On a successful resolve that DIFFERS from the previously-announced 

415 URL, the resolver: 

416 - Emits a WAMP toast so the user knows their LLM endpoint moved 

417 - Auto-corrects stale loopback fields in llama_config.json so 

418 non-resolver readers (chat path's raw external_llm_endpoint 

419 consumer) also see the live URL on next read 

420 - Updates HEVOLVE_LOCAL_LLM_URL env so other resolver-based 

421 callers in the same process see the fresh value immediately 

422 

423 Cold-boot fallback: if no candidate is reachable (typical during 

424 the first ~30s of boot before llama-server has finished spawning), 

425 returns the highest-priority candidate URL anyway as a stable 

426 placeholder. Callers handle the "configured but not yet listening" 

427 case via their existing connection-error paths; the placeholder is 

428 correct *and* the call site doesn't need to special-case None. 

429 

430 Returns: 

431 Full URL string, e.g. 'http://127.0.0.1:8082/v1' 

432 """ 

433 import time 

434 global _llm_url_cache, _llm_url_cache_ts 

435 

436 now = time.time() 

437 if _llm_url_cache and (now - _llm_url_cache_ts) < _LLM_URL_CACHE_TTL: 

438 return _llm_url_cache 

439 

440 # Build the ordered candidate list — same sources as before plus 

441 # the wizard-recorded external_llm_endpoint.base_url (the field that 

442 # caused the 2026-04-29 drift incident). Empty strings are filtered 

443 # at the probe step. 

444 candidates: list = [] 

445 

446 candidates.append(os.environ.get('HEVOLVE_LOCAL_LLM_URL', '')) 

447 candidates.append(os.environ.get('CUSTOM_LLM_BASE_URL', '')) 

448 

449 _port_env = os.environ.get('LLAMA_CPP_PORT', '') 

450 if _port_env: 

451 candidates.append(f'http://127.0.0.1:{_port_env}/v1') 

452 

453 try: 

454 import json as _json 

455 _cfg_path = os.path.join( 

456 os.path.expanduser('~'), '.nunba', 'llama_config.json') 

457 if os.path.isfile(_cfg_path): 

458 with open(_cfg_path) as _f: 

459 _cfg = _json.load(_f) 

460 _ext = (_cfg.get('external_llm_endpoint') or {}).get('base_url') or '' 

461 if _ext: 

462 candidates.append(_ext) 

463 _port = _cfg.get('server_port') 

464 if _port: 

465 candidates.append(f'http://127.0.0.1:{_port}/v1') 

466 _cab = _cfg.get('custom_api_base') or '' 

467 if _cab: 

468 candidates.append(_cab) 

469 except Exception: 

470 pass 

471 

472 candidates.append(f'http://127.0.0.1:{get_port("llm")}/v1') 

473 

474 # Normalize, dedupe (preserving order), filter invalids. 

475 seen: set = set() 

476 normalized: list = [] 

477 for raw in candidates: 

478 if not raw: 

479 continue 

480 u = raw.rstrip('/') 

481 if not u.endswith('/v1'): 

482 u += '/v1' 

483 if not _validate_llm_url(u): 

484 continue 

485 if u in seen: 

486 continue 

487 seen.add(u) 

488 normalized.append(u) 

489 

490 if not normalized: 

491 # Should be unreachable — port_registry default is always present — 

492 # but stay defensive. 

493 normalized = [f'http://127.0.0.1:{get_port("llm")}/v1'] 

494 

495 # First reachable candidate wins. If none are reachable (cold boot 

496 # before llama-server spawns), return the highest-priority candidate 

497 # anyway as a stable placeholder. 

498 chosen = None 

499 for url in normalized: 

500 if _probe_llm_endpoint(url): 

501 chosen = url 

502 break 

503 if chosen is None: 

504 chosen = normalized[0] 

505 

506 _llm_url_cache = chosen 

507 _llm_url_cache_ts = now 

508 

509 # On a real transition (different URL than previously announced), 

510 # heal the drift so non-resolver readers also pick up the live URL, 

511 # surface the change to the user, and update env so any caller in 

512 # this process that's still on `os.environ.get(HEVOLVE_LOCAL_LLM_URL)` 

513 # sees the same answer. 

514 if chosen != _llm_url_last_announced: 

515 _emit_llm_url_change_toast(chosen) 

516 _autocorrect_stale_loopback_config(chosen) 

517 try: 

518 os.environ['HEVOLVE_LOCAL_LLM_URL'] = chosen 

519 except Exception: 

520 pass 

521 

522 return chosen 

523 

524 

525def set_local_llm_url(url: str) -> None: 

526 """Set the local LLM URL and propagate to env. 

527 

528 Called by Nunba when: 

529 - start_server() detects/starts a server on a port 

530 - Port conflict causes reassignment to a new port 

531 - User provides a custom endpoint via the wizard 

532 

533 Validates the URL, sets HEVOLVE_LOCAL_LLM_URL, and invalidates cache. 

534 """ 

535 url = url.rstrip('/') 

536 if not url.endswith('/v1'): 

537 url += '/v1' 

538 

539 if not _validate_llm_url(url): 

540 logger.error(f"Refusing to set invalid LLM URL: {url}") 

541 return 

542 

543 os.environ['HEVOLVE_LOCAL_LLM_URL'] = url 

544 invalidate_llm_url() 

545 logger.info(f"LLM URL set: {url}") 

546 

547 

548def invalidate_llm_url() -> None: 

549 """Clear the cached LLM URL. Call after port changes.""" 

550 global _llm_url_cache 

551 _llm_url_cache = '' 

552 

553 

554def is_local_llm() -> bool: 

555 """Check if the configured LLM is a local endpoint (zero cost). 

556 

557 Returns True if the resolved URL points to localhost/127.0.0.1, 

558 or if a local LLM model name is configured. 

559 """ 

560 if os.environ.get('HEVOLVE_LOCAL_LLM_MODEL'): 

561 return True 

562 url = get_local_llm_url() 

563 return any(h in url for h in ('localhost', '127.0.0.1', '0.0.0.0', '[::1]')) 

564 

565 

566def _validate_llm_url(url: str) -> bool: 

567 """Validate that a URL is well-formed for an LLM endpoint. 

568 

569 Checks: has scheme (http/https), has host, port is numeric if present. 

570 Does NOT check connectivity — that's a runtime concern. 

571 """ 

572 if not url: 

573 return False 

574 if not url.startswith(('http://', 'https://')): 

575 return False 

576 # Extract host:port portion 

577 try: 

578 after_scheme = url.split('://', 1)[1] 

579 host_port = after_scheme.split('/')[0] 

580 if ':' in host_port: 

581 host, port_str = host_port.rsplit(':', 1) 

582 if not host or not port_str.isdigit(): 

583 return False 

584 port = int(port_str) 

585 if port < 1 or port > 65535: 

586 return False 

587 elif not host_port: 

588 return False 

589 except (IndexError, ValueError): 

590 return False 

591 return True