Coverage for core/health

1"""Canonical runtime-state probes.

3Single source of truth for "is the daemon actually running?", "is the

4LLM server actually reachable?", "is Flask up?". Replaces the

5duplicated, drift-prone probes that previously lived inline in BOTH

6`integrations/mcp/mcp_server.py` and

7`integrations/mcp/mcp_http_bridge.py`.

9Why this module exists (root-cause notes from 2026-05-01 incident):

111. The old `daemon_enabled` probe read

12 ``os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', 'false')`` — a

13 *config snapshot*, not the actual thread state. It returned

14 ``'false'`` even when the daemon thread was alive, because the

15 env-var auto-setter at ``integrations/social/__init__.py:348``

16 only runs `if env is None`, leaving any other unset/empty value

17 to default to `'false'`. Probes must read the actual

18 ``agent_daemon._running`` singleton state.

202. The old `llm_server` probe hit

21 ``http://localhost:{get_port('llm')}/health`` (default 8080). On

22 installs where llama-server binds to a non-default port (set via

23 ``HEVOLVE_LOCAL_LLM_URL``, ``LLAMA_CPP_PORT``, or written into

24 ``~/.nunba/llama_config.json:server_port``), this hardcoded URL

25 misses entirely. The canonical resolver

26 ``core.port_registry.get_local_llm_url()`` already walks 7

27 candidate sources and probes each — both MCP probes must route

28 through it instead of duplicating a worse version of the same

29 logic.

31Public API: each `probe_*` function returns a plain dict that the

32MCP tools serialize to JSON. Side-effect free, fast (≤200 ms total

33on a healthy host).

35Per CLAUDE.md DRY gate — no parallel implementations of these probes

36are allowed elsewhere. If you find yourself writing

37``os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', ...)`` to figure out

38"is the daemon on?", you are in the wrong place; call

39``probe_agent_daemon()`` instead.

40"""

41from __future__ import annotations

42import os

43from typing import Dict, Any

46def probe_agent_daemon() -> Dict[str, Any]:

47 """Return the actual agent daemon thread state plus config.

49 Reads ``agent_daemon._running`` and ``agent_daemon._thread`` —

50 NOT the ``HEVOLVE_AGENT_ENGINE_ENABLED`` env var (which is the

51 pre-boot intent, not the live state). Falls back to env var if

52 the daemon module cannot be imported (extreme degraded boot).

53 """

54 out: Dict[str, Any] = {

55 'poll_interval': int(os.environ.get('HEVOLVE_AGENT_POLL_INTERVAL', '30')),

56 'max_concurrent': int(os.environ.get('HEVOLVE_AGENT_MAX_CONCURRENT', '10')),

57 'speculative_enabled': (

58 os.environ.get('HEVOLVE_SPECULATIVE_ENABLED', 'false').lower() == 'true'

59 ),

60 }

61 try:

62 from integrations.agent_engine.agent_daemon import agent_daemon

63 out['daemon_enabled'] = bool(agent_daemon._running)

64 out['daemon_thread_alive'] = bool(

65 agent_daemon._thread and agent_daemon._thread.is_alive()

66 )

67 out['daemon_tick_count'] = int(getattr(agent_daemon, '_tick_count', 0))

68 except Exception as e:

69 # Degraded fallback — couldn't reach the daemon module at all.

70 out['daemon_enabled'] = (

71 os.environ.get('HEVOLVE_AGENT_ENGINE_ENABLED', 'false').lower() == 'true'

72 )

73 out['daemon_thread_alive'] = False

74 out['daemon_probe_error'] = str(e)

75 return out

78def probe_llm(include_models: bool = False) -> Dict[str, Any]:

79 """Return live LLM server state via an HTTP-fidelity probe.

81 Issues an actual HTTP GET to ``<url>/models`` and checks for a 200

82 response. Distinct from the TCP-only ``_probe_llm_endpoint`` in

83 ``core.port_registry`` which is the cheap candidate-filter for

84 ``get_local_llm_url`` — that one stays TCP-only on purpose

85 (sub-1ms per candidate). This probe upgrades to HTTP fidelity

86 so a half-loaded llama-server (port bound but model not ready)

87 is correctly reported as ``down`` (#459).

89 SRP (#458): the default response is a single HTTP request — no

90 second-call side effect. Pass ``include_models=True`` when you

91 actually need the model-list payload; otherwise the response body

92 is discarded.

94 Always returns the URL we tried so debugging is one log line

95 instead of "down" with no clue.

96 """

97 out: Dict[str, Any] = {}

98 try:

99 from core.port_registry import get_local_llm_url

100 url = get_local_llm_url()

101 out['url'] = url

102 except Exception as e:

103 out['status'] = 'probe_error'

104 out['error'] = str(e)

105 return out

106 try:

107 from core.http_pool import pooled_get

108 # ``get_local_llm_url`` returns the ".../v1" suffix so /models

109 # is the OpenAI-compatible models endpoint. A 200 here proves

110 # the LLM is actually serving — port-bound-but-stuck processes

111 # return 5xx / connection-error / timeout.

112 models_url = url.rstrip('/') + '/models'

113 resp = pooled_get(models_url, timeout=2)

114 if resp.status_code == 200:

115 out['status'] = 'up'

116 if include_models:

117 try:

118 data = resp.json()

119 out['models'] = [

120 m.get('id', 'unknown')

121 for m in data.get('data', [])

122 ]

123 except Exception:

124 pass

125 else:

126 out['status'] = 'down'

127 out['code'] = resp.status_code

128 except Exception as e:

129 out['status'] = 'down'

130 out['error'] = str(e)

131 return out

132

133

134def probe_nunba_flask() -> Dict[str, Any]:

135 """Return Nunba Flask server state.

136

137 Resolves the port via the canonical ``core.port_registry.get_port

138 ('flask')`` resolver instead of the previously-hardcoded :5000

139 literal (#460) — env override ``HART_FLASK_PORT`` is honored

140 automatically.

141 """

142 out: Dict[str, Any] = {}

143 try:

144 from core.port_registry import get_port

145 port = get_port('flask')

146 out['port'] = port

147 except Exception as e:

148 out['status'] = 'probe_error'

149 out['error'] = str(e)

150 return out

151 try:

152 from core.http_pool import pooled_get

153 resp = pooled_get(f'http://localhost:{port}/health', timeout=2)

154 out['status'] = ('up' if resp.status_code == 200

155 else f'status_{resp.status_code}')

156 out['code'] = resp.status_code

157 except Exception as e:

158 out['status'] = 'down'

159 out['error'] = str(e)

160 return out

161

162

163def probe_langchain() -> Dict[str, Any]:

164 """Return langchain GPT API sidecar state.

165

166 Resolves the port via the canonical ``core.port_registry.get_port

167 ('langchain')`` resolver instead of the previously-hardcoded :6778

168 literal (#460) — env override ``HART_LANGCHAIN_PORT`` is honored

169 automatically.

170 """

171 out: Dict[str, Any] = {}

172 try:

173 from core.port_registry import get_port

174 port = get_port('langchain')

175 out['port'] = port

176 except Exception as e:

177 out['status'] = 'probe_error'

178 out['error'] = str(e)

179 return out

180 try:

181 from core.http_pool import pooled_get

182 resp = pooled_get(f'http://localhost:{port}/health', timeout=2)

183 out['status'] = 'up' if resp.status_code == 200 else 'error'

184 out['code'] = resp.status_code

185 except Exception:

186 out['status'] = 'down'

187 return out

188

189

190__all__ = [

191 'probe_agent_daemon',

192 'probe_llm',

193 'probe_nunba_flask',

194 'probe_langchain',

195]

Coverage for core / health_probe.py: 87.3%

55 statements