Coverage for core / port_registry.py: 80.9%
230 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2Port Registry — Single source of truth for HART OS service ports.
4Two modes:
5 APP MODE (default): User-space ports (6777+) for running alongside other apps
6 OS MODE (NixOS): Privileged ports (<1024) for running as the operating system
8OS mode is activated when:
9 - HART_OS_MODE=true environment variable is set, OR
10 - /etc/os-release contains ID=hart-os (NixOS deployment)
12Privileged ports (<1024) require root/systemd, which is correct for OS daemons.
13This frees ports 1024-65535 for user applications.
15Usage:
16 from core.port_registry import get_port, get_all_ports, is_os_mode
18 port = get_port('backend') # 677 (OS mode) or 6777 (app mode)
19 port = get_port('backend', 9999) # Override with specific port
20"""
22import logging
23import os
24import socket
26logger = logging.getLogger('hevolve.ports')
28# ── Port Definitions ──────────────────────────────────────────
30# App mode: user-space ports (coexist with other software)
31APP_PORTS = {
32 'backend': 6777,
33 'discovery': 6780,
34 'vision': 9891,
35 'llm': 8080,
36 'websocket': 5460,
37 'diarization': 8004,
38 'stt_stream': 8005,
39 'dlna_stream': 8554,
40 'mesh_wg': 6795,
41 'mesh_relay': 6796,
42 'model_bus': 6790,
43 'mcp': 6791,
44 'vlm_caption': 8081,
45 # Nunba Flask (the user-facing app server) + langchain GPT-API
46 # sidecar. Previously hardcoded in core/health_probe.py — moved
47 # here per #460 so the probes (and any future caller) walks the
48 # canonical resolver instead of duplicating the literal port.
49 'flask': 5000,
50 'langchain': 6778,
51}
53# OS mode: privileged ports (HART OS is the operating system)
54OS_PORTS = {
55 'backend': 677,
56 'discovery': 678,
57 'vision': 989,
58 'llm': 808,
59 'websocket': 546,
60 'diarization': 800,
61 'stt_stream': 801,
62 'dlna_stream': 855,
63 'mesh_wg': 679,
64 'mesh_relay': 680,
65 'model_bus': 681,
66 'mcp': 682,
67 'vlm_caption': 809,
68 'flask': 500,
69 # 778 (not 677) to avoid colliding with backend=677 in OS mode.
70 'langchain': 778,
71}
73# Environment variable overrides (takes precedence over both modes)
74ENV_OVERRIDES = {
75 'backend': 'HARTOS_BACKEND_PORT',
76 'discovery': 'HART_DISCOVERY_PORT',
77 'vision': 'HART_VISION_PORT',
78 'llm': 'HART_LLM_PORT',
79 'websocket': 'HART_WS_PORT',
80 'diarization': 'HEVOLVE_DIARIZATION_PORT',
81 'stt_stream': 'HART_STT_STREAM_PORT',
82 'dlna_stream': 'HART_DLNA_PORT',
83 'mesh_wg': 'HART_MESH_WG_PORT',
84 'mesh_relay': 'HART_MESH_RELAY_PORT',
85 'model_bus': 'HART_MODEL_BUS_PORT',
86 'mcp': 'HART_MCP_PORT',
87 'vlm_caption': 'HEVOLVE_VLM_CAPTION_PORT',
88 'flask': 'HART_FLASK_PORT',
89 'langchain': 'HART_LANGCHAIN_PORT',
90}
93# ── Detection ─────────────────────────────────────────────────
95_os_mode_cached = None
98def is_os_mode() -> bool:
99 """Detect if running as HART OS (the operating system).
101 True when:
102 - HART_OS_MODE=true env var, OR
103 - /etc/os-release contains ID=hart-os (NixOS deployment)
104 """
105 global _os_mode_cached
106 if _os_mode_cached is not None:
107 return _os_mode_cached
109 # Explicit env var
110 if os.environ.get('HART_OS_MODE', '').lower() in ('true', '1', 'yes'):
111 _os_mode_cached = True
112 return True
114 # NixOS detection: check /etc/os-release
115 try:
116 with open('/etc/os-release', 'r') as f:
117 for line in f:
118 if line.strip().startswith('ID=') and 'hart-os' in line:
119 _os_mode_cached = True
120 return True
121 except (FileNotFoundError, PermissionError):
122 pass
124 _os_mode_cached = False
125 return False
128# ── Port Resolution ───────────────────────────────────────────
130def get_port(service: str, override: int = None) -> int:
131 """Get the port for a HART OS service.
133 Resolution order:
134 1. Explicit override parameter
135 2. Environment variable (HARTOS_BACKEND_PORT, etc.)
136 3. OS-mode port (if running as HART OS)
137 4. App-mode port (default)
139 Args:
140 service: Service name ('backend', 'discovery', 'vision', etc.)
141 override: Explicit port override (highest priority).
143 Returns:
144 Port number.
145 """
146 # 1. Explicit override
147 if override is not None:
148 return override
150 # 2. Environment variable
151 env_var = ENV_OVERRIDES.get(service)
152 if env_var:
153 env_val = os.environ.get(env_var)
154 if env_val:
155 try:
156 return int(env_val)
157 except ValueError:
158 logger.warning(f"Invalid port in {env_var}={env_val}, using default")
160 # 3. OS mode vs App mode
161 if is_os_mode():
162 return OS_PORTS.get(service, APP_PORTS.get(service, 0))
164 return APP_PORTS.get(service, 0)
167def get_all_ports() -> dict:
168 """Get all service ports as a dict."""
169 return {service: get_port(service) for service in APP_PORTS}
172def check_port_available(port: int, host: str = '0.0.0.0') -> bool:
173 """Check if a port is available for binding.
175 Args:
176 port: Port number to check.
177 host: Host to check on.
179 Returns:
180 True if port is available.
181 """
182 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
183 try:
184 s.settimeout(1)
185 s.bind((host, port))
186 return True
187 except OSError:
188 return False
189 finally:
190 s.close()
193def get_mode_label() -> str:
194 """Return 'OS' or 'APP' for display."""
195 return 'OS' if is_os_mode() else 'APP'
198# ── LLM URL Resolution ──────────────────────────────────────
200_llm_url_cache: str = ''
201_llm_url_cache_ts: float = 0.0
202_LLM_URL_CACHE_TTL: float = 30.0 # seconds — re-resolve after this
203_LLM_PROBE_TIMEOUT: float = 1.0 # seconds per candidate probe
204_LLM_PROBE_NEG_TTL: float = 10.0 # seconds — cache "dead" verdicts
205_llm_probe_cache: dict = {} # url → (is_healthy, ts)
206_llm_url_last_announced: str = '' # for change-toast emission
209def _probe_llm_endpoint(url: str) -> bool:
210 """Cheap TCP-connect probe with short-lived result caching.
212 True if something is listening on the URL's host:port. No HTTP,
213 no body — just confirms the port is open. Result is cached for
214 ``_LLM_PROBE_NEG_TTL`` seconds so repeated resolver calls don't
215 re-probe dead candidates and burn 1s each time.
217 Used by ``get_local_llm_url`` to walk candidate URLs and pick the
218 first reachable one instead of returning the first non-empty config
219 field and discovering it's dead at synth time. Exception path
220 returns False — we never raise from a probe.
221 """
222 import time
223 cached = _llm_probe_cache.get(url)
224 if cached is not None:
225 ok, ts = cached
226 if (time.time() - ts) < _LLM_PROBE_NEG_TTL:
227 return ok
228 try:
229 import socket
230 body = url.split('://', 1)[-1].split('/', 1)[0]
231 host, _, port_s = body.partition(':')
232 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
233 s.settimeout(_LLM_PROBE_TIMEOUT)
234 try:
235 ok = s.connect_ex(
236 (host or '127.0.0.1', int(port_s or 0))) == 0
237 finally:
238 s.close()
239 except Exception:
240 ok = False
241 _llm_probe_cache[url] = (ok, time.time())
242 return ok
245def _is_loopback_url(url: str) -> bool:
246 """True iff the URL points at this machine. Used to gate auto-
247 correct of stale config fields — never rewrite a non-loopback URL
248 (could be a real remote endpoint the user explicitly chose)."""
249 try:
250 body = url.split('://', 1)[-1].split('/', 1)[0]
251 host = body.partition(':')[0].lower()
252 return host in ('127.0.0.1', 'localhost', '0.0.0.0', '[::1]', '')
253 except Exception:
254 return False
257def _autocorrect_stale_loopback_config(healthy_url: str) -> None:
258 """Rewrite drifted loopback URLs in ``~/.nunba/llama_config.json``
259 so non-resolver readers (chat path's direct read of
260 ``external_llm_endpoint.base_url``) also see the live URL.
262 Safety rails:
263 * Only touches fields whose CURRENT value is loopback — a real
264 remote endpoint the user typed (e.g. a cloud OpenAI-compat URL)
265 is preserved verbatim. The toast still fires; the user fixes
266 externally.
267 * Only writes if at least one field actually changed — no
268 timestamp churn on the config file otherwise.
269 * Never raises.
271 This is the auto-correct the user asked for: when the resolver
272 detects drift between two source-of-truth fields, the stale one
273 gets healed in place so the next boot reads cleanly without an
274 operator edit.
275 """
276 if not _is_loopback_url(healthy_url):
277 return
278 try:
279 import json as _json
280 cfg_path = os.path.join(
281 os.path.expanduser('~'), '.nunba', 'llama_config.json')
282 if not os.path.isfile(cfg_path):
283 return
284 with open(cfg_path) as _f:
285 cfg = _json.load(_f)
286 changed = False
287 # external_llm_endpoint — only auto-fix if its current value is
288 # loopback (i.e. it was *intended* to point at this machine).
289 ext = cfg.get('external_llm_endpoint') or {}
290 ext_base = ext.get('base_url') or ''
291 if ext_base and _is_loopback_url(ext_base) and ext_base != healthy_url:
292 ext['base_url'] = healthy_url
293 ext['completions'] = (
294 healthy_url.rstrip('/').removesuffix('/v1')
295 + '/v1/chat/completions'
296 )
297 cfg['external_llm_endpoint'] = ext
298 changed = True
299 # custom_api_base — same shape, host-portion of healthy URL.
300 cab = cfg.get('custom_api_base') or ''
301 healthy_host = healthy_url.rstrip('/').removesuffix('/v1')
302 if cab and _is_loopback_url(cab) and cab != healthy_host:
303 cfg['custom_api_base'] = healthy_host
304 changed = True
305 if changed:
306 with open(cfg_path, 'w') as _f:
307 _json.dump(cfg, _f, indent=2)
308 logger.info(
309 "[LLM URL] Auto-corrected stale loopback config to %s",
310 healthy_url)
311 except Exception as e:
312 logger.debug("[LLM URL] auto-correct skipped: %s", e)
315def _emit_llm_url_change_toast(new_url: str) -> None:
316 """Best-effort WAMP toast when the resolved LLM URL changes.
318 Fires once per actual transition; subsequent resolver calls that
319 return the same URL are silent. Surfaces drift (config stale,
320 llama-server moved port, external endpoint went down → fell back to
321 bundled, etc.) to the user without log-spam. Never raises.
322 """
323 global _llm_url_last_announced
324 if new_url == _llm_url_last_announced:
325 return
326 _llm_url_last_announced = new_url
327 try:
328 from core.realtime import publish_async as _wamp_pub
329 _wamp_pub(
330 'com.hertzai.hevolve.llm.endpoint_changed',
331 {'url': new_url, 'reason': 'resolver fall-through'},
332 timeout=0.3,
333 )
334 except Exception:
335 pass
338def get_local_draft_url() -> str:
339 """Single source of truth for the local DRAFT LLM endpoint URL.
341 The draft model is the Qwen3.5-0.8B instance that answers
342 dispatch_draft_first calls and generates continuous video captions.
344 On ≥8GB VRAM, draft runs on a SEPARATE port (8081) from the main
345 model (8080) so both stay resident simultaneously.
347 On ≤6GB VRAM (no separate draft server), the draft URL points to
348 the MAIN model's port so the same model serves both roles — draft
349 classification AND agentic responses. This avoids the "draft offline
350 → fall through → slow main" latency penalty by letting the speculative
351 dispatcher talk to whatever model IS running.
353 Resolution order:
354 1. HEVOLVE_DRAFT_LLM_URL — full URL override
355 2. HEVOLVE_VLM_CAPTION_PORT — port override (separate draft server)
356 3. If draft server is running on default port → use it
357 4. Otherwise → fall back to main LLM URL (same model, dual role)
359 Returns full URL with /v1 suffix (OpenAI-compatible).
360 """
361 url = os.environ.get('HEVOLVE_DRAFT_LLM_URL', '').strip()
362 if not url:
363 port = os.environ.get('HEVOLVE_VLM_CAPTION_PORT', '').strip()
364 if not port:
365 port = str(get_port('vlm_caption'))
366 url = f'http://127.0.0.1:{port}/v1'
368 url = url.rstrip('/')
369 if not url.endswith('/v1'):
370 url += '/v1'
372 # If draft port has no server, use the main LLM instead (single-model mode).
373 # This makes the main model serve BOTH draft and agentic roles on low VRAM.
374 try:
375 import socket
376 _body = url.split('://', 1)[-1].split('/', 1)[0]
377 host, _, port_s = _body.partition(':')
378 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
379 s.settimeout(0.3)
380 result = s.connect_ex((host or '127.0.0.1', int(port_s or 8081)))
381 s.close()
382 if result != 0:
383 # Draft port not listening → use main model as draft
384 return get_local_llm_url()
385 except Exception:
386 pass
388 return url
391def get_local_llm_url() -> str:
392 """Single source of truth for the local LLM endpoint URL.
394 Resolution order — every non-empty candidate is PROBED, and the
395 first reachable one wins. Stale-but-configured URLs (e.g. wizard
396 wrote 8080, llama-server later moved to 8082) no longer cause silent
397 chat hangs. Probe results are cached for a short TTL so the walk
398 is cheap on the chat hot path.
400 Candidate order:
401 1. HEVOLVE_LOCAL_LLM_URL — canonical env override
402 2. CUSTOM_LLM_BASE_URL — user-provided custom endpoint
403 3. LLAMA_CPP_PORT — deprecated port-only env var
404 4. ~/.nunba/llama_config.json: external_llm_endpoint.base_url
405 — wizard-recorded "external" (often
406 actually a loopback, see drift bug
407 2026-04-29)
408 5. ~/.nunba/llama_config.json: server_port
409 — Nunba's auto-managed bundled server
410 6. ~/.nunba/llama_config.json: custom_api_base
411 — wizard mirror of server_port
412 7. port_registry default — get_port('llm')
414 On a successful resolve that DIFFERS from the previously-announced
415 URL, the resolver:
416 - Emits a WAMP toast so the user knows their LLM endpoint moved
417 - Auto-corrects stale loopback fields in llama_config.json so
418 non-resolver readers (chat path's raw external_llm_endpoint
419 consumer) also see the live URL on next read
420 - Updates HEVOLVE_LOCAL_LLM_URL env so other resolver-based
421 callers in the same process see the fresh value immediately
423 Cold-boot fallback: if no candidate is reachable (typical during
424 the first ~30s of boot before llama-server has finished spawning),
425 returns the highest-priority candidate URL anyway as a stable
426 placeholder. Callers handle the "configured but not yet listening"
427 case via their existing connection-error paths; the placeholder is
428 correct *and* the call site doesn't need to special-case None.
430 Returns:
431 Full URL string, e.g. 'http://127.0.0.1:8082/v1'
432 """
433 import time
434 global _llm_url_cache, _llm_url_cache_ts
436 now = time.time()
437 if _llm_url_cache and (now - _llm_url_cache_ts) < _LLM_URL_CACHE_TTL:
438 return _llm_url_cache
440 # Build the ordered candidate list — same sources as before plus
441 # the wizard-recorded external_llm_endpoint.base_url (the field that
442 # caused the 2026-04-29 drift incident). Empty strings are filtered
443 # at the probe step.
444 candidates: list = []
446 candidates.append(os.environ.get('HEVOLVE_LOCAL_LLM_URL', ''))
447 candidates.append(os.environ.get('CUSTOM_LLM_BASE_URL', ''))
449 _port_env = os.environ.get('LLAMA_CPP_PORT', '')
450 if _port_env:
451 candidates.append(f'http://127.0.0.1:{_port_env}/v1')
453 try:
454 import json as _json
455 _cfg_path = os.path.join(
456 os.path.expanduser('~'), '.nunba', 'llama_config.json')
457 if os.path.isfile(_cfg_path):
458 with open(_cfg_path) as _f:
459 _cfg = _json.load(_f)
460 _ext = (_cfg.get('external_llm_endpoint') or {}).get('base_url') or ''
461 if _ext:
462 candidates.append(_ext)
463 _port = _cfg.get('server_port')
464 if _port:
465 candidates.append(f'http://127.0.0.1:{_port}/v1')
466 _cab = _cfg.get('custom_api_base') or ''
467 if _cab:
468 candidates.append(_cab)
469 except Exception:
470 pass
472 candidates.append(f'http://127.0.0.1:{get_port("llm")}/v1')
474 # Normalize, dedupe (preserving order), filter invalids.
475 seen: set = set()
476 normalized: list = []
477 for raw in candidates:
478 if not raw:
479 continue
480 u = raw.rstrip('/')
481 if not u.endswith('/v1'):
482 u += '/v1'
483 if not _validate_llm_url(u):
484 continue
485 if u in seen:
486 continue
487 seen.add(u)
488 normalized.append(u)
490 if not normalized:
491 # Should be unreachable — port_registry default is always present —
492 # but stay defensive.
493 normalized = [f'http://127.0.0.1:{get_port("llm")}/v1']
495 # First reachable candidate wins. If none are reachable (cold boot
496 # before llama-server spawns), return the highest-priority candidate
497 # anyway as a stable placeholder.
498 chosen = None
499 for url in normalized:
500 if _probe_llm_endpoint(url):
501 chosen = url
502 break
503 if chosen is None:
504 chosen = normalized[0]
506 _llm_url_cache = chosen
507 _llm_url_cache_ts = now
509 # On a real transition (different URL than previously announced),
510 # heal the drift so non-resolver readers also pick up the live URL,
511 # surface the change to the user, and update env so any caller in
512 # this process that's still on `os.environ.get(HEVOLVE_LOCAL_LLM_URL)`
513 # sees the same answer.
514 if chosen != _llm_url_last_announced:
515 _emit_llm_url_change_toast(chosen)
516 _autocorrect_stale_loopback_config(chosen)
517 try:
518 os.environ['HEVOLVE_LOCAL_LLM_URL'] = chosen
519 except Exception:
520 pass
522 return chosen
525def set_local_llm_url(url: str) -> None:
526 """Set the local LLM URL and propagate to env.
528 Called by Nunba when:
529 - start_server() detects/starts a server on a port
530 - Port conflict causes reassignment to a new port
531 - User provides a custom endpoint via the wizard
533 Validates the URL, sets HEVOLVE_LOCAL_LLM_URL, and invalidates cache.
534 """
535 url = url.rstrip('/')
536 if not url.endswith('/v1'):
537 url += '/v1'
539 if not _validate_llm_url(url):
540 logger.error(f"Refusing to set invalid LLM URL: {url}")
541 return
543 os.environ['HEVOLVE_LOCAL_LLM_URL'] = url
544 invalidate_llm_url()
545 logger.info(f"LLM URL set: {url}")
548def invalidate_llm_url() -> None:
549 """Clear the cached LLM URL. Call after port changes."""
550 global _llm_url_cache
551 _llm_url_cache = ''
554def is_local_llm() -> bool:
555 """Check if the configured LLM is a local endpoint (zero cost).
557 Returns True if the resolved URL points to localhost/127.0.0.1,
558 or if a local LLM model name is configured.
559 """
560 if os.environ.get('HEVOLVE_LOCAL_LLM_MODEL'):
561 return True
562 url = get_local_llm_url()
563 return any(h in url for h in ('localhost', '127.0.0.1', '0.0.0.0', '[::1]'))
566def _validate_llm_url(url: str) -> bool:
567 """Validate that a URL is well-formed for an LLM endpoint.
569 Checks: has scheme (http/https), has host, port is numeric if present.
570 Does NOT check connectivity — that's a runtime concern.
571 """
572 if not url:
573 return False
574 if not url.startswith(('http://', 'https://')):
575 return False
576 # Extract host:port portion
577 try:
578 after_scheme = url.split('://', 1)[1]
579 host_port = after_scheme.split('/')[0]
580 if ':' in host_port:
581 host, port_str = host_port.rsplit(':', 1)
582 if not host or not port_str.isdigit():
583 return False
584 port = int(port_str)
585 if port < 1 or port > 65535:
586 return False
587 elif not host_port:
588 return False
589 except (IndexError, ValueError):
590 return False
591 return True