Coverage for core/port_registry.py: 80.9%

1"""

2Port Registry — Single source of truth for HART OS service ports.

4Two modes:

5 APP MODE (default): User-space ports (6777+) for running alongside other apps

6 OS MODE (NixOS): Privileged ports (<1024) for running as the operating system

8OS mode is activated when:

9 - HART_OS_MODE=true environment variable is set, OR

10 - /etc/os-release contains ID=hart-os (NixOS deployment)

12Privileged ports (<1024) require root/systemd, which is correct for OS daemons.

13This frees ports 1024-65535 for user applications.

15Usage:

16 from core.port_registry import get_port, get_all_ports, is_os_mode

18 port = get_port('backend') # 677 (OS mode) or 6777 (app mode)

19 port = get_port('backend', 9999) # Override with specific port

20"""

22import logging

23import os

24import socket

26logger = logging.getLogger('hevolve.ports')

28# ── Port Definitions ──────────────────────────────────────────

30# App mode: user-space ports (coexist with other software)

31APP_PORTS = {

32 'backend': 6777,

33 'discovery': 6780,

34 'vision': 9891,

35 'llm': 8080,

36 'websocket': 5460,

37 'diarization': 8004,

38 'stt_stream': 8005,

39 'dlna_stream': 8554,

40 'mesh_wg': 6795,

41 'mesh_relay': 6796,

42 'model_bus': 6790,

43 'mcp': 6791,

44 'vlm_caption': 8081,

45 # Nunba Flask (the user-facing app server) + langchain GPT-API

46 # sidecar. Previously hardcoded in core/health_probe.py — moved

47 # here per #460 so the probes (and any future caller) walks the

48 # canonical resolver instead of duplicating the literal port.

49 'flask': 5000,

50 'langchain': 6778,

51}

53# OS mode: privileged ports (HART OS is the operating system)

54OS_PORTS = {

55 'backend': 677,

56 'discovery': 678,

57 'vision': 989,

58 'llm': 808,

59 'websocket': 546,

60 'diarization': 800,

61 'stt_stream': 801,

62 'dlna_stream': 855,

63 'mesh_wg': 679,

64 'mesh_relay': 680,

65 'model_bus': 681,

66 'mcp': 682,

67 'vlm_caption': 809,

68 'flask': 500,

69 # 778 (not 677) to avoid colliding with backend=677 in OS mode.

70 'langchain': 778,

71}

73# Environment variable overrides (takes precedence over both modes)

74ENV_OVERRIDES = {

75 'backend': 'HARTOS_BACKEND_PORT',

76 'discovery': 'HART_DISCOVERY_PORT',

77 'vision': 'HART_VISION_PORT',

78 'llm': 'HART_LLM_PORT',

79 'websocket': 'HART_WS_PORT',

80 'diarization': 'HEVOLVE_DIARIZATION_PORT',

81 'stt_stream': 'HART_STT_STREAM_PORT',

82 'dlna_stream': 'HART_DLNA_PORT',

83 'mesh_wg': 'HART_MESH_WG_PORT',

84 'mesh_relay': 'HART_MESH_RELAY_PORT',

85 'model_bus': 'HART_MODEL_BUS_PORT',

86 'mcp': 'HART_MCP_PORT',

87 'vlm_caption': 'HEVOLVE_VLM_CAPTION_PORT',

88 'flask': 'HART_FLASK_PORT',

89 'langchain': 'HART_LANGCHAIN_PORT',

90}

93# ── Detection ─────────────────────────────────────────────────

95_os_mode_cached = None

98def is_os_mode() -> bool:

99 """Detect if running as HART OS (the operating system).

100

101 True when:

102 - HART_OS_MODE=true env var, OR

103 - /etc/os-release contains ID=hart-os (NixOS deployment)

104 """

105 global _os_mode_cached

106 if _os_mode_cached is not None:

107 return _os_mode_cached

108

109 # Explicit env var

110 if os.environ.get('HART_OS_MODE', '').lower() in ('true', '1', 'yes'):

111 _os_mode_cached = True

112 return True

113

114 # NixOS detection: check /etc/os-release

115 try:

116 with open('/etc/os-release', 'r') as f:

117 for line in f:

118 if line.strip().startswith('ID=') and 'hart-os' in line:

119 _os_mode_cached = True

120 return True

121 except (FileNotFoundError, PermissionError):

122 pass

123

124 _os_mode_cached = False

125 return False

126

127

128# ── Port Resolution ───────────────────────────────────────────

129

130def get_port(service: str, override: int = None) -> int:

131 """Get the port for a HART OS service.

132

133 Resolution order:

134 1. Explicit override parameter

135 2. Environment variable (HARTOS_BACKEND_PORT, etc.)

136 3. OS-mode port (if running as HART OS)

137 4. App-mode port (default)

138

139 Args:

140 service: Service name ('backend', 'discovery', 'vision', etc.)

141 override: Explicit port override (highest priority).

142

143 Returns:

144 Port number.

145 """

146 # 1. Explicit override

147 if override is not None:

148 return override

149

150 # 2. Environment variable

151 env_var = ENV_OVERRIDES.get(service)

152 if env_var:

153 env_val = os.environ.get(env_var)

154 if env_val:

155 try:

156 return int(env_val)

157 except ValueError:

158 logger.warning(f"Invalid port in {env_var}={env_val}, using default")

159

160 # 3. OS mode vs App mode

161 if is_os_mode():

162 return OS_PORTS.get(service, APP_PORTS.get(service, 0))

163

164 return APP_PORTS.get(service, 0)

165

166

167def get_all_ports() -> dict:

168 """Get all service ports as a dict."""

169 return {service: get_port(service) for service in APP_PORTS}

170

171

172def check_port_available(port: int, host: str = '0.0.0.0') -> bool:

173 """Check if a port is available for binding.

174

175 Args:

176 port: Port number to check.

177 host: Host to check on.

178

179 Returns:

180 True if port is available.

181 """

182 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

183 try:

184 s.settimeout(1)

185 s.bind((host, port))

186 return True

187 except OSError:

188 return False

189 finally:

190 s.close()

191

192

193def get_mode_label() -> str:

194 """Return 'OS' or 'APP' for display."""

195 return 'OS' if is_os_mode() else 'APP'

196

197

198# ── LLM URL Resolution ──────────────────────────────────────

199

200_llm_url_cache: str = ''

201_llm_url_cache_ts: float = 0.0

202_LLM_URL_CACHE_TTL: float = 30.0 # seconds — re-resolve after this

203_LLM_PROBE_TIMEOUT: float = 1.0 # seconds per candidate probe

204_LLM_PROBE_NEG_TTL: float = 10.0 # seconds — cache "dead" verdicts

205_llm_probe_cache: dict = {} # url → (is_healthy, ts)

206_llm_url_last_announced: str = '' # for change-toast emission

207

208

209def _probe_llm_endpoint(url: str) -> bool:

210 """Cheap TCP-connect probe with short-lived result caching.

211

212 True if something is listening on the URL's host:port. No HTTP,

213 no body — just confirms the port is open. Result is cached for

214 ``_LLM_PROBE_NEG_TTL`` seconds so repeated resolver calls don't

215 re-probe dead candidates and burn 1s each time.

216

217 Used by ``get_local_llm_url`` to walk candidate URLs and pick the

218 first reachable one instead of returning the first non-empty config

219 field and discovering it's dead at synth time. Exception path

220 returns False — we never raise from a probe.

221 """

222 import time

223 cached = _llm_probe_cache.get(url)

224 if cached is not None:

225 ok, ts = cached

226 if (time.time() - ts) < _LLM_PROBE_NEG_TTL:

227 return ok

228 try:

229 import socket

230 body = url.split('://', 1)[-1].split('/', 1)[0]

231 host, _, port_s = body.partition(':')

232 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

233 s.settimeout(_LLM_PROBE_TIMEOUT)

234 try:

235 ok = s.connect_ex(

236 (host or '127.0.0.1', int(port_s or 0))) == 0

237 finally:

238 s.close()

239 except Exception:

240 ok = False

241 _llm_probe_cache[url] = (ok, time.time())

242 return ok

243

244

245def _is_loopback_url(url: str) -> bool:

246 """True iff the URL points at this machine. Used to gate auto-

247 correct of stale config fields — never rewrite a non-loopback URL

248 (could be a real remote endpoint the user explicitly chose)."""

249 try:

250 body = url.split('://', 1)[-1].split('/', 1)[0]

251 host = body.partition(':')[0].lower()

252 return host in ('127.0.0.1', 'localhost', '0.0.0.0', '[::1]', '')

253 except Exception:

254 return False

255

256

257def _autocorrect_stale_loopback_config(healthy_url: str) -> None:

258 """Rewrite drifted loopback URLs in ``~/.nunba/llama_config.json``

259 so non-resolver readers (chat path's direct read of

260 ``external_llm_endpoint.base_url``) also see the live URL.

261

262 Safety rails:

263 * Only touches fields whose CURRENT value is loopback — a real

264 remote endpoint the user typed (e.g. a cloud OpenAI-compat URL)

265 is preserved verbatim. The toast still fires; the user fixes

266 externally.

267 * Only writes if at least one field actually changed — no

268 timestamp churn on the config file otherwise.

269 * Never raises.

270

271 This is the auto-correct the user asked for: when the resolver

272 detects drift between two source-of-truth fields, the stale one

273 gets healed in place so the next boot reads cleanly without an

274 operator edit.

275 """

276 if not _is_loopback_url(healthy_url):

277 return

278 try:

279 import json as _json

280 cfg_path = os.path.join(

281 os.path.expanduser('~'), '.nunba', 'llama_config.json')

282 if not os.path.isfile(cfg_path):

283 return

284 with open(cfg_path) as _f:

285 cfg = _json.load(_f)

286 changed = False

287 # external_llm_endpoint — only auto-fix if its current value is

288 # loopback (i.e. it was *intended* to point at this machine).

289 ext = cfg.get('external_llm_endpoint') or {}

290 ext_base = ext.get('base_url') or ''

291 if ext_base and _is_loopback_url(ext_base) and ext_base != healthy_url:

292 ext['base_url'] = healthy_url

293 ext['completions'] = (

294 healthy_url.rstrip('/').removesuffix('/v1')

295 + '/v1/chat/completions'

296 )

297 cfg['external_llm_endpoint'] = ext

298 changed = True

299 # custom_api_base — same shape, host-portion of healthy URL.

300 cab = cfg.get('custom_api_base') or ''

301 healthy_host = healthy_url.rstrip('/').removesuffix('/v1')

302 if cab and _is_loopback_url(cab) and cab != healthy_host:

303 cfg['custom_api_base'] = healthy_host

304 changed = True

305 if changed:

306 with open(cfg_path, 'w') as _f:

307 _json.dump(cfg, _f, indent=2)

308 logger.info(

309 "[LLM URL] Auto-corrected stale loopback config to %s",

310 healthy_url)

311 except Exception as e:

312 logger.debug("[LLM URL] auto-correct skipped: %s", e)

313

314

315def _emit_llm_url_change_toast(new_url: str) -> None:

316 """Best-effort WAMP toast when the resolved LLM URL changes.

317

318 Fires once per actual transition; subsequent resolver calls that

319 return the same URL are silent. Surfaces drift (config stale,

320 llama-server moved port, external endpoint went down → fell back to

321 bundled, etc.) to the user without log-spam. Never raises.

322 """

323 global _llm_url_last_announced

324 if new_url == _llm_url_last_announced:

325 return

326 _llm_url_last_announced = new_url

327 try:

328 from core.realtime import publish_async as _wamp_pub

329 _wamp_pub(

330 'com.hertzai.hevolve.llm.endpoint_changed',

331 {'url': new_url, 'reason': 'resolver fall-through'},

332 timeout=0.3,

333 )

334 except Exception:

335 pass

336

337

338def get_local_draft_url() -> str:

339 """Single source of truth for the local DRAFT LLM endpoint URL.

340

341 The draft model is the Qwen3.5-0.8B instance that answers

342 dispatch_draft_first calls and generates continuous video captions.

343

344 On ≥8GB VRAM, draft runs on a SEPARATE port (8081) from the main

345 model (8080) so both stay resident simultaneously.

346

347 On ≤6GB VRAM (no separate draft server), the draft URL points to

348 the MAIN model's port so the same model serves both roles — draft

349 classification AND agentic responses. This avoids the "draft offline

350 → fall through → slow main" latency penalty by letting the speculative

351 dispatcher talk to whatever model IS running.

352

353 Resolution order:

354 1. HEVOLVE_DRAFT_LLM_URL — full URL override

355 2. HEVOLVE_VLM_CAPTION_PORT — port override (separate draft server)

356 3. If draft server is running on default port → use it

357 4. Otherwise → fall back to main LLM URL (same model, dual role)

358

359 Returns full URL with /v1 suffix (OpenAI-compatible).

360 """

361 url = os.environ.get('HEVOLVE_DRAFT_LLM_URL', '').strip()

362 if not url:

363 port = os.environ.get('HEVOLVE_VLM_CAPTION_PORT', '').strip()

364 if not port:

365 port = str(get_port('vlm_caption'))

366 url = f'http://127.0.0.1:{port}/v1'

367

368 url = url.rstrip('/')

369 if not url.endswith('/v1'):

370 url += '/v1'

371

372 # If draft port has no server, use the main LLM instead (single-model mode).

373 # This makes the main model serve BOTH draft and agentic roles on low VRAM.

374 try:

375 import socket

376 _body = url.split('://', 1)[-1].split('/', 1)[0]

377 host, _, port_s = _body.partition(':')

378 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

379 s.settimeout(0.3)

380 result = s.connect_ex((host or '127.0.0.1', int(port_s or 8081)))

381 s.close()

382 if result != 0:

383 # Draft port not listening → use main model as draft

384 return get_local_llm_url()

385 except Exception:

386 pass

387

388 return url

389

390

391def get_local_llm_url() -> str:

392 """Single source of truth for the local LLM endpoint URL.

393

394 Resolution order — every non-empty candidate is PROBED, and the

395 first reachable one wins. Stale-but-configured URLs (e.g. wizard

396 wrote 8080, llama-server later moved to 8082) no longer cause silent

397 chat hangs. Probe results are cached for a short TTL so the walk

398 is cheap on the chat hot path.

399

400 Candidate order:

401 1. HEVOLVE_LOCAL_LLM_URL — canonical env override

402 2. CUSTOM_LLM_BASE_URL — user-provided custom endpoint

403 3. LLAMA_CPP_PORT — deprecated port-only env var

404 4. ~/.nunba/llama_config.json: external_llm_endpoint.base_url

405 — wizard-recorded "external" (often

406 actually a loopback, see drift bug

407 2026-04-29)

408 5. ~/.nunba/llama_config.json: server_port

409 — Nunba's auto-managed bundled server

410 6. ~/.nunba/llama_config.json: custom_api_base

411 — wizard mirror of server_port

412 7. port_registry default — get_port('llm')

413

414 On a successful resolve that DIFFERS from the previously-announced

415 URL, the resolver:

416 - Emits a WAMP toast so the user knows their LLM endpoint moved

417 - Auto-corrects stale loopback fields in llama_config.json so

418 non-resolver readers (chat path's raw external_llm_endpoint

419 consumer) also see the live URL on next read

420 - Updates HEVOLVE_LOCAL_LLM_URL env so other resolver-based

421 callers in the same process see the fresh value immediately

422

423 Cold-boot fallback: if no candidate is reachable (typical during

424 the first ~30s of boot before llama-server has finished spawning),

425 returns the highest-priority candidate URL anyway as a stable

426 placeholder. Callers handle the "configured but not yet listening"

427 case via their existing connection-error paths; the placeholder is

428 correct *and* the call site doesn't need to special-case None.

429

430 Returns:

431 Full URL string, e.g. 'http://127.0.0.1:8082/v1'

432 """

433 import time

434 global _llm_url_cache, _llm_url_cache_ts

435

436 now = time.time()

437 if _llm_url_cache and (now - _llm_url_cache_ts) < _LLM_URL_CACHE_TTL:

438 return _llm_url_cache

439

440 # Build the ordered candidate list — same sources as before plus

441 # the wizard-recorded external_llm_endpoint.base_url (the field that

442 # caused the 2026-04-29 drift incident). Empty strings are filtered

443 # at the probe step.

444 candidates: list = []

445

446 candidates.append(os.environ.get('HEVOLVE_LOCAL_LLM_URL', ''))

447 candidates.append(os.environ.get('CUSTOM_LLM_BASE_URL', ''))

448

449 _port_env = os.environ.get('LLAMA_CPP_PORT', '')

450 if _port_env:

451 candidates.append(f'http://127.0.0.1:{_port_env}/v1')

452

453 try:

454 import json as _json

455 _cfg_path = os.path.join(

456 os.path.expanduser('~'), '.nunba', 'llama_config.json')

457 if os.path.isfile(_cfg_path):

458 with open(_cfg_path) as _f:

459 _cfg = _json.load(_f)

460 _ext = (_cfg.get('external_llm_endpoint') or {}).get('base_url') or ''

461 if _ext:

462 candidates.append(_ext)

463 _port = _cfg.get('server_port')

464 if _port:

465 candidates.append(f'http://127.0.0.1:{_port}/v1')

466 _cab = _cfg.get('custom_api_base') or ''

467 if _cab:

468 candidates.append(_cab)

469 except Exception:

470 pass

471

472 candidates.append(f'http://127.0.0.1:{get_port("llm")}/v1')

473

474 # Normalize, dedupe (preserving order), filter invalids.

475 seen: set = set()

476 normalized: list = []

477 for raw in candidates:

478 if not raw:

479 continue

480 u = raw.rstrip('/')

481 if not u.endswith('/v1'):

482 u += '/v1'

483 if not _validate_llm_url(u):

484 continue

485 if u in seen:

486 continue

487 seen.add(u)

488 normalized.append(u)

489

490 if not normalized:

491 # Should be unreachable — port_registry default is always present —

492 # but stay defensive.

493 normalized = [f'http://127.0.0.1:{get_port("llm")}/v1']

494

495 # First reachable candidate wins. If none are reachable (cold boot

496 # before llama-server spawns), return the highest-priority candidate

497 # anyway as a stable placeholder.

498 chosen = None

499 for url in normalized:

500 if _probe_llm_endpoint(url):

501 chosen = url

502 break

503 if chosen is None:

504 chosen = normalized[0]

505

506 _llm_url_cache = chosen

507 _llm_url_cache_ts = now

508

509 # On a real transition (different URL than previously announced),

510 # heal the drift so non-resolver readers also pick up the live URL,

511 # surface the change to the user, and update env so any caller in

512 # this process that's still on `os.environ.get(HEVOLVE_LOCAL_LLM_URL)`

513 # sees the same answer.

514 if chosen != _llm_url_last_announced:

515 _emit_llm_url_change_toast(chosen)

516 _autocorrect_stale_loopback_config(chosen)

517 try:

518 os.environ['HEVOLVE_LOCAL_LLM_URL'] = chosen

519 except Exception:

520 pass

521

522 return chosen

523

524

525def set_local_llm_url(url: str) -> None:

526 """Set the local LLM URL and propagate to env.

527

528 Called by Nunba when:

529 - start_server() detects/starts a server on a port

530 - Port conflict causes reassignment to a new port

531 - User provides a custom endpoint via the wizard

532

533 Validates the URL, sets HEVOLVE_LOCAL_LLM_URL, and invalidates cache.

534 """

535 url = url.rstrip('/')

536 if not url.endswith('/v1'):

537 url += '/v1'

538

539 if not _validate_llm_url(url):

540 logger.error(f"Refusing to set invalid LLM URL: {url}")

541 return

542

543 os.environ['HEVOLVE_LOCAL_LLM_URL'] = url

544 invalidate_llm_url()

545 logger.info(f"LLM URL set: {url}")

546

547

548def invalidate_llm_url() -> None:

549 """Clear the cached LLM URL. Call after port changes."""

550 global _llm_url_cache

551 _llm_url_cache = ''

552

553

554def is_local_llm() -> bool:

555 """Check if the configured LLM is a local endpoint (zero cost).

556

557 Returns True if the resolved URL points to localhost/127.0.0.1,

558 or if a local LLM model name is configured.

559 """

560 if os.environ.get('HEVOLVE_LOCAL_LLM_MODEL'):

561 return True

562 url = get_local_llm_url()

563 return any(h in url for h in ('localhost', '127.0.0.1', '0.0.0.0', '[::1]'))

564

565

566def _validate_llm_url(url: str) -> bool:

567 """Validate that a URL is well-formed for an LLM endpoint.

568

569 Checks: has scheme (http/https), has host, port is numeric if present.

570 Does NOT check connectivity — that's a runtime concern.

571 """

572 if not url:

573 return False

574 if not url.startswith(('http://', 'https://')):

575 return False

576 # Extract host:port portion

577 try:

578 after_scheme = url.split('://', 1)[1]

579 host_port = after_scheme.split('/')[0]

580 if ':' in host_port:

581 host, port_str = host_port.rsplit(':', 1)

582 if not host or not port_str.isdigit():

583 return False

584 port = int(port_str)

585 if port < 1 or port > 65535:

586 return False

587 elif not host_port:

588 return False

589 except (IndexError, ValueError):

590 return False

591 return True

Coverage for core / port_registry.py: 80.9%

230 statements