Coverage for core / health_probe.py: 87.3%

55 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1"""Canonical runtime-state probes. 

2 

3Single source of truth for "is the daemon actually running?", "is the 

4LLM server actually reachable?", "is Flask up?". Replaces the 

5duplicated, drift-prone probes that previously lived inline in BOTH 

6`integrations/mcp/mcp_server.py` and 

7`integrations/mcp/mcp_http_bridge.py`. 

8 

9Why this module exists (root-cause notes from 2026-05-01 incident): 

10 

111. The old `daemon_enabled` probe read 

12 ``os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', 'false')`` — a 

13 *config snapshot*, not the actual thread state. It returned 

14 ``'false'`` even when the daemon thread was alive, because the 

15 env-var auto-setter at ``integrations/social/__init__.py:348`` 

16 only runs `if env is None`, leaving any other unset/empty value 

17 to default to `'false'`. Probes must read the actual 

18 ``agent_daemon._running`` singleton state. 

19 

202. The old `llm_server` probe hit 

21 ``http://localhost:{get_port('llm')}/health`` (default 8080). On 

22 installs where llama-server binds to a non-default port (set via 

23 ``HEVOLVE_LOCAL_LLM_URL``, ``LLAMA_CPP_PORT``, or written into 

24 ``~/.nunba/llama_config.json:server_port``), this hardcoded URL 

25 misses entirely. The canonical resolver 

26 ``core.port_registry.get_local_llm_url()`` already walks 7 

27 candidate sources and probes each — both MCP probes must route 

28 through it instead of duplicating a worse version of the same 

29 logic. 

30 

31Public API: each `probe_*` function returns a plain dict that the 

32MCP tools serialize to JSON. Side-effect free, fast (≤200 ms total 

33on a healthy host). 

34 

35Per CLAUDE.md DRY gate — no parallel implementations of these probes 

36are allowed elsewhere. If you find yourself writing 

37``os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', ...)`` to figure out 

38"is the daemon on?", you are in the wrong place; call 

39``probe_agent_daemon()`` instead. 

40""" 

41from __future__ import annotations 

42import os 

43from typing import Dict, Any 

44 

45 

46def probe_agent_daemon() -> Dict[str, Any]: 

47 """Return the actual agent daemon thread state plus config. 

48 

49 Reads ``agent_daemon._running`` and ``agent_daemon._thread`` — 

50 NOT the ``HEVOLVE_AGENT_ENGINE_ENABLED`` env var (which is the 

51 pre-boot intent, not the live state). Falls back to env var if 

52 the daemon module cannot be imported (extreme degraded boot). 

53 """ 

54 out: Dict[str, Any] = { 

55 'poll_interval': int(os.environ.get('HEVOLVE_AGENT_POLL_INTERVAL', '30')), 

56 'max_concurrent': int(os.environ.get('HEVOLVE_AGENT_MAX_CONCURRENT', '10')), 

57 'speculative_enabled': ( 

58 os.environ.get('HEVOLVE_SPECULATIVE_ENABLED', 'false').lower() == 'true' 

59 ), 

60 } 

61 try: 

62 from integrations.agent_engine.agent_daemon import agent_daemon 

63 out['daemon_enabled'] = bool(agent_daemon._running) 

64 out['daemon_thread_alive'] = bool( 

65 agent_daemon._thread and agent_daemon._thread.is_alive() 

66 ) 

67 out['daemon_tick_count'] = int(getattr(agent_daemon, '_tick_count', 0)) 

68 except Exception as e: 

69 # Degraded fallback — couldn't reach the daemon module at all. 

70 out['daemon_enabled'] = ( 

71 os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', 'false').lower() == 'true' 

72 ) 

73 out['daemon_thread_alive'] = False 

74 out['daemon_probe_error'] = str(e) 

75 return out 

76 

77 

78def probe_llm(include_models: bool = False) -> Dict[str, Any]: 

79 """Return live LLM server state via an HTTP-fidelity probe. 

80 

81 Issues an actual HTTP GET to ``<url>/models`` and checks for a 200 

82 response. Distinct from the TCP-only ``_probe_llm_endpoint`` in 

83 ``core.port_registry`` which is the cheap candidate-filter for 

84 ``get_local_llm_url`` — that one stays TCP-only on purpose 

85 (sub-1ms per candidate). This probe upgrades to HTTP fidelity 

86 so a half-loaded llama-server (port bound but model not ready) 

87 is correctly reported as ``down`` (#459). 

88 

89 SRP (#458): the default response is a single HTTP request — no 

90 second-call side effect. Pass ``include_models=True`` when you 

91 actually need the model-list payload; otherwise the response body 

92 is discarded. 

93 

94 Always returns the URL we tried so debugging is one log line 

95 instead of "down" with no clue. 

96 """ 

97 out: Dict[str, Any] = {} 

98 try: 

99 from core.port_registry import get_local_llm_url 

100 url = get_local_llm_url() 

101 out['url'] = url 

102 except Exception as e: 

103 out['status'] = 'probe_error' 

104 out['error'] = str(e) 

105 return out 

106 try: 

107 from core.http_pool import pooled_get 

108 # ``get_local_llm_url`` returns the ".../v1" suffix so /models 

109 # is the OpenAI-compatible models endpoint. A 200 here proves 

110 # the LLM is actually serving — port-bound-but-stuck processes 

111 # return 5xx / connection-error / timeout. 

112 models_url = url.rstrip('/') + '/models' 

113 resp = pooled_get(models_url, timeout=2) 

114 if resp.status_code == 200: 

115 out['status'] = 'up' 

116 if include_models: 

117 try: 

118 data = resp.json() 

119 out['models'] = [ 

120 m.get('id', 'unknown') 

121 for m in data.get('data', []) 

122 ] 

123 except Exception: 

124 pass 

125 else: 

126 out['status'] = 'down' 

127 out['code'] = resp.status_code 

128 except Exception as e: 

129 out['status'] = 'down' 

130 out['error'] = str(e) 

131 return out 

132 

133 

134def probe_nunba_flask() -> Dict[str, Any]: 

135 """Return Nunba Flask server state. 

136 

137 Resolves the port via the canonical ``core.port_registry.get_port 

138 ('flask')`` resolver instead of the previously-hardcoded :5000 

139 literal (#460) — env override ``HART_FLASK_PORT`` is honored 

140 automatically. 

141 """ 

142 out: Dict[str, Any] = {} 

143 try: 

144 from core.port_registry import get_port 

145 port = get_port('flask') 

146 out['port'] = port 

147 except Exception as e: 

148 out['status'] = 'probe_error' 

149 out['error'] = str(e) 

150 return out 

151 try: 

152 from core.http_pool import pooled_get 

153 resp = pooled_get(f'http://localhost:{port}/health', timeout=2) 

154 out['status'] = ('up' if resp.status_code == 200 

155 else f'status_{resp.status_code}') 

156 out['code'] = resp.status_code 

157 except Exception as e: 

158 out['status'] = 'down' 

159 out['error'] = str(e) 

160 return out 

161 

162 

163def probe_langchain() -> Dict[str, Any]: 

164 """Return langchain GPT API sidecar state. 

165 

166 Resolves the port via the canonical ``core.port_registry.get_port 

167 ('langchain')`` resolver instead of the previously-hardcoded :6778 

168 literal (#460) — env override ``HART_LANGCHAIN_PORT`` is honored 

169 automatically. 

170 """ 

171 out: Dict[str, Any] = {} 

172 try: 

173 from core.port_registry import get_port 

174 port = get_port('langchain') 

175 out['port'] = port 

176 except Exception as e: 

177 out['status'] = 'probe_error' 

178 out['error'] = str(e) 

179 return out 

180 try: 

181 from core.http_pool import pooled_get 

182 resp = pooled_get(f'http://localhost:{port}/health', timeout=2) 

183 out['status'] = 'up' if resp.status_code == 200 else 'error' 

184 out['code'] = resp.status_code 

185 except Exception: 

186 out['status'] = 'down' 

187 return out 

188 

189 

190__all__ = [ 

191 'probe_agent_daemon', 

192 'probe_llm', 

193 'probe_nunba_flask', 

194 'probe_langchain', 

195]