Coverage for core/verified

1"""Verified-signal LLM health check.

3Why this exists

4---------------

5Shallow-signal health checks lie:

6- /health returns 200 when llama-server is up but no model is loaded

7- /v1/models returns the catalog entry even if inference is broken

8- process-alive is a proxy for "maybe working" — not a guarantee

10This module issues a real inference — POST /v1/chat/completions with

11a minimal prompt — and asserts the response contains non-empty text.

12That is the only "yes it works" signal that matters.

14Symptom class

15-------------

16Mirrors the verified_ready pattern used for TTS (see commit b84437d

17on Nunba: runtime verification of Indic Parler). Same root cause:

18success at one layer does not imply capability at the next layer.

20API

21---

22 is_llm_inference_verified(url, timeout=5.0) -> bool

23 verify_llm(url, timeout=5.0) -> dict # detail diag

25Both accept:

26- url: full http://host:port base; defaults to http://127.0.0.1:8080

27- timeout: total seconds for the HTTP call

29The bool API is a drop-in replacement for is_llm_available().

30The dict API returns {ok, reason, http_status, content_snippet,

31elapsed_ms} for deeper triage.

32"""

34from __future__ import annotations

36import json

37import logging

38import time

39import urllib.error

40import urllib.request

41from typing import Any, Dict, Optional

43logger = logging.getLogger(__name__)

45# Exposed for tests / monkey-patch

46DEFAULT_PROBE_PROMPT = "hi"

47DEFAULT_BASE_URL = "http://127.0.0.1:8080"

48DEFAULT_TIMEOUT = 5.0

49DEFAULT_MAX_TOKENS = 4

52def _probe_payload(prompt: str, max_tokens: int) -> bytes:

53 body = {

54 "messages": [{"role": "user", "content": prompt}],

55 "max_tokens": max_tokens,

56 "temperature": 0.0,

57 "stream": False,

58 }

59 return json.dumps(body).encode("utf-8")

62def _extract_content(data: Dict[str, Any]) -> str:

63 """Pull text out of an OpenAI-compatible /v1/chat/completions response.

65 Handles the 2 common shapes:

66 - {'choices': [{'message': {'content': '...'}}]}

67 - {'choices': [{'text': '...'}]} (legacy /v1/completions)

69 Returns '' if no content can be found.

70 """

71 choices = data.get("choices") or []

72 if not choices:

73 return ""

74 first = choices[0] or {}

75 msg = first.get("message") or {}

76 content = msg.get("content")

77 if isinstance(content, str) and content.strip():

78 return content

79 # legacy /v1/completions style

80 text = first.get("text")

81 if isinstance(text, str) and text.strip():

82 return text

83 return ""

86def verify_llm(

87 url: str = DEFAULT_BASE_URL,

88 timeout: float = DEFAULT_TIMEOUT,

89 prompt: str = DEFAULT_PROBE_PROMPT,

90 max_tokens: int = DEFAULT_MAX_TOKENS,

91) -> Dict[str, Any]:

92 """Verified-signal health check via real inference.

94 Returns dict:

95 - ok (bool): True iff the LLM produced non-empty content

96 - reason (str): human-readable failure class

97 - http_status (int | None): HTTP status code if reachable

98 - content_snippet (str): first 64 chars of the reply (on ok=True)

99 - elapsed_ms (int): end-to-end time

100 """

101 endpoint = url.rstrip("/") + "/v1/chat/completions"

102 payload = _probe_payload(prompt, max_tokens)

103 started = time.monotonic()

104

105 result: Dict[str, Any] = {

106 "ok": False,

107 "reason": "unknown",

108 "http_status": None,

109 "content_snippet": "",

110 "elapsed_ms": 0,

111 }

112

113 try:

114 req = urllib.request.Request(

115 endpoint,

116 data=payload,

117 method="POST",

118 headers={"Content-Type": "application/json"},

119 )

120 with urllib.request.urlopen(req, timeout=timeout) as resp:

121 result["http_status"] = resp.status

122 if resp.status != 200:

123 result["reason"] = f"http_{resp.status}"

124 return result

125 raw = resp.read()

126 except urllib.error.HTTPError as e:

127 result["http_status"] = e.code

128 result["reason"] = f"http_{e.code}"

129 return result

130 except urllib.error.URLError as e:

131 result["reason"] = f"unreachable:{e.reason}"

132 return result

133 except Exception as e: # timeout, socket, malformed URL

134 result["reason"] = f"exception:{type(e).__name__}"

135 return result

136 finally:

137 result["elapsed_ms"] = int((time.monotonic() - started) * 1000)

138

139 # Parse body

140 try:

141 data = json.loads(raw)

142 except Exception:

143 result["reason"] = "malformed_json"

144 return result

145

146 content = _extract_content(data)

147 if not content:

148 result["reason"] = "empty_content"

149 return result

150

151 result["ok"] = True

152 result["reason"] = "verified"

153 result["content_snippet"] = content[:64]

154 return result

155

156

157def is_llm_inference_verified(

158 url: str = DEFAULT_BASE_URL,

159 timeout: float = DEFAULT_TIMEOUT,

160) -> bool:

161 """Boolean drop-in for is_llm_available().

162

163 Returns True iff a real /v1/chat/completions probe produced

164 non-empty content within `timeout` seconds.

165 """

166 try:

167 return verify_llm(url=url, timeout=timeout)["ok"]

168 except Exception as exc: # defense-in-depth — never raise

169 logger.debug("verify_llm raised unexpectedly: %s", exc)

170 return False

171

172

173__all__ = [

174 "DEFAULT_BASE_URL",

175 "DEFAULT_PROBE_PROMPT",

176 "DEFAULT_TIMEOUT",

177 "DEFAULT_MAX_TOKENS",

178 "is_llm_inference_verified",

179 "verify_llm",

180]

Coverage for core / verified_llm.py: 0.0%

63 statements