Coverage for integrations / service_tools / system_introspect_tool.py: 0.0%

153 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1"""System self-introspection tool for agent self-awareness. 

2 

3Exposes the Nunba/HARTOS runtime state (GPU tier, active models, TTS 

4backend, WAMP router state, draft-gate decision rationale) so the LLM 

5can answer "what model is running?", "why is chat slow?", "do I have 

6a GPU?", "is speculation on?" using real live data instead of 

7hallucinating. 

8 

9Every function in this module: 

10 1. Calls the LOCAL Nunba HTTP API (http://127.0.0.1:5000) — the 

11 authoritative source of runtime state. No direct module imports 

12 from Nunba internals, so this works in both in-process (agent 

13 running inside Flask) and cross-process (agent running in its 

14 own Python) deployments. 

15 2. Returns a dict with both structured fields AND a natural-language 

16 summary in `summary` — so the LLM can quote the summary verbatim 

17 for conversational replies, or read structured fields for 

18 follow-up logic. 

19 3. Has a short timeout (3s) — if the local Flask is down, returns 

20 `{available: False, reason: ...}` instead of hanging the agent. 

21 

22Dual registration: 

23 - LangChain: `get_langchain_tools()` wraps each function as a 

24 `langchain.tools.Tool` — importable from Nunba's langchain path. 

25 - AutoGen: `register_autogen(agent, user_proxy)` calls 

26 `autogen.register_function` for each. 

27 

28Extension pattern: 

29 - To add a new introspection endpoint, write a small Python function 

30 here, append it to `_TOOL_FUNCTIONS`. Both loaders pick it up 

31 automatically — no two-sided wiring. 

32""" 

33 

34from __future__ import annotations 

35 

36import logging 

37import os 

38from typing import Any, Callable, Dict, List, Optional 

39 

40import requests 

41 

42try: 

43 from core.labeled_tool import labeled_tool 

44except ImportError: # cx_Freeze / degraded test env 

45 def labeled_tool(name, func, description, *, ui_label): # type: ignore 

46 try: 

47 from langchain_core.tools import Tool as _Tool 

48 except ImportError: 

49 from langchain.agents import Tool as _Tool 

50 return _Tool(name=name, func=func, description=description) 

51 

52 

53# Friendly UI status labels live in the canonical static dict at 

54# core/constants.py:TOOL_LABELS (#509 — eliminates the prior duplicate 

55# `_INTROSPECT_LABELS` table here). Both `get_langchain_tools()` and 

56# `register_autogen()` below look up `TOOL_LABELS.get(fn.__name__, …)`. 

57 

58logger = logging.getLogger(__name__) 

59 

60_NUNBA_BASE = os.environ.get( 

61 'NUNBA_BASE_URL', 'http://127.0.0.1:5000', 

62).rstrip('/') 

63_TIMEOUT = 3.0 

64 

65 

66# ═══════════════════════════════════════════════════════════════════ 

67# Low-level HTTP helper 

68# ═══════════════════════════════════════════════════════════════════ 

69 

70def _get(path: str) -> Dict[str, Any]: 

71 """GET a Nunba admin endpoint. Returns `{available: False, ...}` on 

72 timeout / connection error so callers never hang + never raise into 

73 the LLM's reasoning loop.""" 

74 url = f"{_NUNBA_BASE}{path}" 

75 try: 

76 r = requests.get(url, timeout=_TIMEOUT) 

77 if r.status_code == 200: 

78 return r.json() 

79 return { 

80 'available': False, 

81 'status_code': r.status_code, 

82 'reason': f"GET {path} returned {r.status_code}", 

83 } 

84 except requests.exceptions.ConnectionError: 

85 return {'available': False, 'reason': f"Nunba Flask not reachable at {_NUNBA_BASE}"} 

86 except requests.exceptions.Timeout: 

87 return {'available': False, 'reason': f"Nunba {path} timed out after {_TIMEOUT}s"} 

88 except Exception as e: 

89 return {'available': False, 'reason': f"GET {path} failed: {e!s}"} 

90 

91 

92def _summarize_gpu(h: Dict[str, Any]) -> str: 

93 """Translate `/backend/health` payload into a one-line English.""" 

94 if h.get('available') is False: 

95 return "GPU status unavailable (backend not reachable)." 

96 tier = h.get('gpu_tier', 'unknown') 

97 name = h.get('gpu_name', 'unknown GPU') 

98 total = h.get('vram_total_gb', 0) or 0 

99 free = h.get('vram_free_gb', 0) or 0 

100 spec = 'on' if h.get('speculation_enabled') else 'off' 

101 tier_human = { 

102 'ultra': 'Ultra (≥24GB) — every backend fits concurrently', 

103 'full': 'Full (≥10GB) — speculative decoding unlocked for all languages', 

104 'standard': 'Standard (4-10GB) — heavy model only, except cohort fast-path', 

105 'none': 'No GPU (<4GB or CUDA unavailable) — CPU-only', 

106 }.get(tier, tier) 

107 if total > 0: 

108 return ( 

109 f"{tier_human}. Device: {name}, {total:.1f}GB VRAM, " 

110 f"{free:.1f}GB free. Speculative decoding: {spec}." 

111 ) 

112 return f"{tier_human}. Device: {name}. Speculative decoding: {spec}." 

113 

114 

115# ═══════════════════════════════════════════════════════════════════ 

116# Public tool functions (registered to langchain + autogen) 

117# ═══════════════════════════════════════════════════════════════════ 

118 

119def get_gpu_tier() -> Dict[str, Any]: 

120 """Report the GPU tier + VRAM + whether speculative decoding is active. 

121 

122 Use this when the user asks: "do I have a GPU?", "why is chat 

123 slow?", "is speculative decoding on?", "how much VRAM do I have?", 

124 "what tier am I?". 

125 """ 

126 h = _get('/backend/health') 

127 return { 

128 'gpu_tier': h.get('gpu_tier'), 

129 'gpu_name': h.get('gpu_name'), 

130 'vram_total_gb': h.get('vram_total_gb'), 

131 'vram_free_gb': h.get('vram_free_gb'), 

132 'cuda_available': h.get('cuda_available'), 

133 'speculation_enabled': h.get('speculation_enabled'), 

134 'available': h.get('available', True), 

135 'summary': _summarize_gpu(h), 

136 } 

137 

138 

139def list_running_models() -> Dict[str, Any]: 

140 """List all models the system knows about + their load state. 

141 

142 Use this when the user asks: "what models are installed?", "what 

143 language model is running?", "what TTS voice am I using?", "do I 

144 have vision?". 

145 """ 

146 h = _get('/api/admin/models') 

147 if h.get('available') is False: 

148 return {'available': False, 'summary': h.get('reason', 'models API unreachable'), 'models': []} 

149 models = h.get('models') or h.get('data') or [] 

150 loaded = [m for m in models if m.get('loaded') or m.get('status') == 'loaded'] 

151 lines: List[str] = [] 

152 lines.append(f"{len(models)} model(s) registered, {len(loaded)} currently loaded.") 

153 for m in loaded[:15]: 

154 name = m.get('name') or m.get('id') or '?' 

155 mtype = m.get('model_type') or m.get('type') or '?' 

156 lang = ','.join(m.get('lang_priority') or []) or 'any' 

157 lines.append(f" - {name} ({mtype}, lang={lang})") 

158 return { 

159 'available': True, 

160 'total_count': len(models), 

161 'loaded_count': len(loaded), 

162 'loaded': loaded, 

163 'all': models, 

164 'summary': '\n'.join(lines), 

165 } 

166 

167 

168def get_tts_status() -> Dict[str, Any]: 

169 """Report the active TTS backend + language ladder + fallback chain. 

170 

171 Use this when the user asks: "what voice am I using?", "why does 

172 Tamil sound weird?", "why is the voice robotic?", "which TTS 

173 engines are loaded?". 

174 """ 

175 h = _get('/api/admin/tts/status') 

176 if h.get('available') is False: 

177 # fallback: derive from /backend/health if tts/status endpoint 

178 # isn't live yet 

179 gh = _get('/backend/health') 

180 return { 

181 'available': False, 

182 'summary': ( 

183 'TTS status endpoint unavailable. ' 

184 + (f"GPU state: {_summarize_gpu(gh)}" if gh.get('available') is not False else '') 

185 ), 

186 } 

187 active = h.get('active_backend') or '?' 

188 lang = h.get('language') or h.get('preferred_lang') or 'en' 

189 ladder = h.get('ladder') or [] 

190 return { 

191 'available': True, 

192 'active_backend': active, 

193 'language': lang, 

194 'ladder': ladder, 

195 'summary': ( 

196 f"Active TTS backend: {active} for language '{lang}'. " 

197 f"Fallback ladder: {' → '.join(ladder) if ladder else '(unknown)'}." 

198 ), 

199 } 

200 

201 

202def get_tier_thresholds() -> Dict[str, Any]: 

203 """Return the canonical GPU tier threshold table. 

204 

205 Use this when the user asks: "what's the difference between 

206 standard and full tier?", "what GPU do I need for speculation?", 

207 "at what VRAM does Indic Parler load?". 

208 """ 

209 h = _get('/api/v1/system/tiers') 

210 if h.get('available') is False: 

211 # static fallback — must mirror core/gpu_tier.py TIER_THRESHOLDS 

212 return { 

213 'available': True, 

214 'source': 'fallback', 

215 'tiers': [ 

216 {'name': 'ultra', 'min_vram_gb': 24, 'description': 'Every backend fits concurrently'}, 

217 {'name': 'full', 'min_vram_gb': 10, 'description': 'Speculative decoding for all languages'}, 

218 {'name': 'standard', 'min_vram_gb': 4, 'description': 'Heavy model only, cohort fast-path for English+Kokoro/Piper'}, 

219 {'name': 'none', 'min_vram_gb': 0, 'description': 'CPU-only'}, 

220 ], 

221 'summary': 'Tier thresholds: ultra≥24GB, full≥10GB, standard≥4GB, none<4GB.', 

222 } 

223 tiers = h.get('tiers') or [] 

224 lines = ['GPU tier thresholds:'] 

225 for t in tiers: 

226 lines.append(f" - {t.get('name')}: ≥{t.get('min_vram_gb')}GB — {t.get('description', '')}") 

227 return {**h, 'summary': '\n'.join(lines)} 

228 

229 

230def get_boot_decision() -> Dict[str, Any]: 

231 """Report why the current draft-gate / speculation state was chosen. 

232 

233 Reads the last line of `~/Documents/Nunba/logs/draft_decision.jsonl` 

234 (written by `LlamaConfig.should_boot_draft` — commit 12c9304). 

235 

236 Use when the user asks: "why is speculation off on my 8GB GPU?", 

237 "why didn't Nunba load the draft model?", "what's the cohort 

238 fast-path?". 

239 """ 

240 import json 

241 from pathlib import Path 

242 log_path = Path.home() / 'Documents' / 'Nunba' / 'logs' / 'draft_decision.jsonl' 

243 if not log_path.exists(): 

244 return { 

245 'available': False, 

246 'summary': ( 

247 "Draft decision log not yet written — this usually means " 

248 "Nunba has not been booted since the cohort-aware gate " 

249 "landed (commit 12c9304), or log directory is missing." 

250 ), 

251 } 

252 try: 

253 with log_path.open(encoding='utf-8') as f: 

254 lines = [line for line in f if line.strip()] 

255 if not lines: 

256 return {'available': False, 'summary': 'Draft decision log empty.'} 

257 last = json.loads(lines[-1]) 

258 return { 

259 'available': True, 

260 'decision': last.get('decision'), 

261 'reason': last.get('reason'), 

262 'lang': last.get('lang'), 

263 'vram_total_gb': last.get('vram_total_gb'), 

264 'vram_free_gb': last.get('vram_free_gb'), 

265 'active_tts': last.get('active_tts'), 

266 'ts': last.get('ts'), 

267 'summary': ( 

268 f"Last boot decision (ts={last.get('ts')}): " 

269 f"{last.get('decision')} — reason: {last.get('reason')}. " 

270 f"Context: lang={last.get('lang')}, " 

271 f"VRAM={last.get('vram_total_gb')}GB total / " 

272 f"{last.get('vram_free_gb')}GB free, " 

273 f"active_tts={last.get('active_tts')}." 

274 ), 

275 } 

276 except Exception as e: 

277 return { 

278 'available': False, 

279 'summary': f"Could not parse draft decision log: {e!s}", 

280 } 

281 

282 

283# ═══════════════════════════════════════════════════════════════════ 

284# Decision-logic RAG — agent reads its own code 

285# ═══════════════════════════════════════════════════════════════════ 

286# 

287# Curated registry of "why did Nunba do X?" questions → the exact 

288# module + symbol that decides it. `explain_decision(topic)` uses 

289# inspect.getsource() to return the live source, so the agent can 

290# explain itself by quoting the actual rules, not a copy-pasted 

291# paraphrase that drifts over time. 

292 

293_DECISION_REGISTRY: Dict[str, Dict[str, str]] = { 

294 'draft_gate': { 

295 'question': "Why did Nunba enable/disable the draft (0.8B) speculative-decoding model?", 

296 'module': 'llama.llama_config', 

297 'symbol': 'should_boot_draft', 

298 'description': ( 

299 "Cohort-aware VRAM gate. ≥10GB → always on. 8-10GB → on " 

300 "ONLY when lang=en AND active_tts ∈ {kokoro,piper} (the " 

301 "cohort fast-path, commit 12c9304). <8GB or Indic/heavy-TTS " 

302 "users in 8-10GB band → off so voice has VRAM headroom." 

303 ), 

304 }, 

305 'tts_lang_ladder': { 

306 'question': "Which TTS engine does Nunba pick for a given language?", 

307 'module': 'tts.tts_engine', 

308 'symbol': '_FALLBACK_LANG_ENGINE_PREFERENCE', 

309 'description': ( 

310 "Per-language engine preference list. Engine chosen by " 

311 "walking the list and taking the first one that fits in " 

312 "current VRAM. English: Chatterbox Turbo → F5 → Indic " 

313 "Parler → Kokoro → Piper. Indic langs: Indic Parler only." 

314 ), 

315 }, 

316 'tts_lang_capability': { 

317 'question': "Which backends can actually speak a given language (vs mumble the wrong phonemes)?", 

318 'module': 'tts.tts_engine', 

319 'symbol': '_LANG_CAPABLE_BACKENDS', 

320 'description': ( 

321 "Safety allowlist: if no capable backend fits, synth returns " 

322 "None and publishes com.hertzai.hevolve.tts.lang_unsupported " 

323 "instead of letting CosyVoice mumble Tamil in English " 

324 "phonemes (commit 9064554)." 

325 ), 

326 }, 

327 'gpu_tier_thresholds': { 

328 'question': "What GPU VRAM tiers does Nunba recognize?", 

329 'module': 'core.gpu_tier', 

330 'symbol': 'TIER_THRESHOLDS', 

331 'description': ( 

332 "Canonical tier table. ultra≥24GB, full≥10GB, standard≥4GB, " 

333 "none<4GB. Single source of truth consumed by backend " 

334 "/backend/health AND frontend GpuTierBadge via /api/v1/" 

335 "system/tiers (architect refactor 57e820b)." 

336 ), 

337 }, 

338 'mcp_auth': { 

339 'question': "How does Nunba authenticate MCP /api/mcp/local requests?", 

340 'module': 'integrations.mcp.mcp_http_bridge', 

341 'symbol': '_mcp_auth_gate', 

342 'description': ( 

343 "Bearer token from %LOCALAPPDATA%/Nunba/mcp.token (or " 

344 "HARTOS_MCP_TOKEN env). /health open, /tools/list loopback-" 

345 "ok, /tools/execute requires bearer. HARTOS_MCP_DISABLE_AUTH " 

346 "=1 bypasses for air-gapped deploys (commits f5b99d8, 49d829d)." 

347 ), 

348 }, 

349 'hf_install_gates': { 

350 'question': "Why did Nunba reject an HF model install?", 

351 'module': 'main', 

352 'symbol': 'admin_models_hub_install', 

353 'description': ( 

354 "4 supply-chain gates: (1) NFKC-normalize hf_id + reject " 

355 "non-ASCII (homoglyph defense), (2) trusted-org allowlist " 

356 "(unknown orgs need confirm_unverified=true), (3) 5s timeout " 

357 "on list_repo_files, (4) reject pickle-only repos — require " 

358 "safetensors variant (commits 7b0e312, 86c44aa, 48d6752)." 

359 ), 

360 }, 

361 'hub_allowlist': { 

362 'question': "Which HF organizations does Nunba trust by default?", 

363 'module': 'core.hub_allowlist', 

364 'symbol': 'HubAllowlist', 

365 'description': ( 

366 "Runtime-editable list at ~/.nunba/hub_allowlist.json. " 

367 "Default seeded from code (google, microsoft, Qwen, " 

368 "ai4bharat, etc.). Admin API: GET/POST/DELETE " 

369 "/api/admin/hub/allowlist (architect refactor 48d6752)." 

370 ), 

371 }, 

372 'vram_swap': { 

373 'question': "Why did Nunba evict an idle model?", 

374 'module': 'integrations.service_tools.model_lifecycle', 

375 'symbol': 'request_swap', 

376 'description': ( 

377 "Pressure-eviction: when requested model can't fit, evict " 

378 "oldest idle non-ACTIVE non-LLM worker and retry load. Guards " 

379 "against evicting pinned models (draft 0.8B) and active " 

380 "inferences (commit fe45daf)." 

381 ), 

382 }, 

383 'language_detection': { 

384 'question': "How does Nunba decide what language the user is speaking?", 

385 'module': 'hart_intelligence_entry', 

386 'symbol': '_read_preferred_lang', 

387 'description': ( 

388 "Reads ~/Documents/Nunba/data/hart_language.json written by " 

389 "the frontend language selector. Falls back to 'en' if " 

390 "missing. Passed through to whisper.transcribe(language=) " 

391 "on STT path so short Tamil utterances aren't misrouted as " 

392 "English (commit 07da0fb)." 

393 ), 

394 }, 

395 'watchdog': { 

396 'question': "What happens if a HARTOS daemon freezes?", 

397 'module': 'security.node_watchdog', 

398 'symbol': 'NodeWatchdog', 

399 'description': ( 

400 "Per-thread heartbeat monitor. If heartbeat missed for " 

401 ">threshold (default 300s), dumps all thread stacks via " 

402 "core.diag, marks thread 'frozen', restarts. Caps at 5 " 

403 "restarts in 5min then marks dormant (commit eb05d0f)." 

404 ), 

405 }, 

406 'wamp_lifecycle': { 

407 'question': "When does Nunba start the WAMP router?", 

408 'module': 'wamp_router', 

409 'symbol': 'ensure_wamp_running', 

410 'description': ( 

411 "Deferred-start: NOT at boot (saves ~100MB). Started on " 

412 "first non-web channel activation OR first peer upgrade. " 

413 "Protected by threading.Lock so concurrent ensure calls can't " 

414 "double-start (commits 48854dc, 852f4ac, 1a8c8e6)." 

415 ), 

416 }, 

417} 

418 

419 

420def explain_decision(topic: str = '') -> Dict[str, Any]: 

421 """Return the SOURCE CODE of a decision-making function/variable so 

422 the agent can explain itself by quoting the live rules, not a stale 

423 paraphrase. 

424 

425 Use this when the user asks "why did you do X?", "explain your 

426 reasoning for Y", "show me the logic that decides Z". Covers the 

427 major decision points: draft gate, TTS ladder, GPU tiers, MCP 

428 auth, HF gates, VRAM eviction, language detection, watchdog, WAMP. 

429 

430 If `topic` matches a known key (see list_decisions) returns the 

431 full source. If `topic` is empty, returns the list of all topics 

432 so the agent can pick the right one for the user's question. 

433 """ 

434 import inspect 

435 import importlib 

436 

437 topic = (topic or '').strip().lower() 

438 if not topic: 

439 return { 

440 'available': True, 

441 'topics': sorted(_DECISION_REGISTRY.keys()), 

442 'summary': ( 

443 'Known decision topics: ' 

444 + ', '.join(sorted(_DECISION_REGISTRY.keys())) 

445 + '. Call explain_decision(topic=<name>) to get the ' 

446 'source code + rationale.' 

447 ), 

448 } 

449 

450 # Exact match first, then fuzzy prefix match 

451 entry = _DECISION_REGISTRY.get(topic) 

452 if not entry: 

453 for k in _DECISION_REGISTRY: 

454 if k.startswith(topic) or topic in k: 

455 entry = _DECISION_REGISTRY[k] 

456 topic = k 

457 break 

458 

459 if not entry: 

460 return { 

461 'available': False, 

462 'summary': ( 

463 f"Unknown decision topic '{topic}'. Known topics: " 

464 + ', '.join(sorted(_DECISION_REGISTRY.keys())) 

465 ), 

466 } 

467 

468 # Try to import the module and grab the source 

469 try: 

470 mod = importlib.import_module(entry['module']) 

471 obj = getattr(mod, entry['symbol'], None) 

472 if obj is None: 

473 return { 

474 'available': False, 

475 'question': entry['question'], 

476 'description': entry['description'], 

477 'module': entry['module'], 

478 'symbol': entry['symbol'], 

479 'summary': ( 

480 f"Module '{entry['module']}' imported but symbol " 

481 f"'{entry['symbol']}' not found. Description: " 

482 + entry['description'] 

483 ), 

484 } 

485 try: 

486 src = inspect.getsource(obj) 

487 except (OSError, TypeError): 

488 # Variable (not a function) — render its repr 

489 src = f"{entry['symbol']} = {obj!r}" 

490 # Truncate to keep LLM context usable 

491 if len(src) > 4000: 

492 src = src[:4000] + '\n... (truncated)' 

493 return { 

494 'available': True, 

495 'topic': topic, 

496 'question': entry['question'], 

497 'description': entry['description'], 

498 'module': entry['module'], 

499 'symbol': entry['symbol'], 

500 'source': src, 

501 'summary': ( 

502 f"{entry['question']}\n\n" 

503 f"Rationale: {entry['description']}\n\n" 

504 f"Source ({entry['module']}.{entry['symbol']}):\n{src}" 

505 ), 

506 } 

507 except ImportError as e: 

508 return { 

509 'available': False, 

510 'question': entry['question'], 

511 'description': entry['description'], 

512 'summary': ( 

513 f"Cannot import {entry['module']}: {e!s}. " 

514 f"Rationale-only: {entry['description']}" 

515 ), 

516 } 

517 

518 

519def list_decisions() -> Dict[str, Any]: 

520 """List all decision topics the agent can explain via 

521 explain_decision(). Use this when the user asks a general "how 

522 does Nunba decide X?" question and you need to pick the right 

523 topic.""" 

524 return { 

525 'available': True, 

526 'topics': [ 

527 {'name': k, 'question': v['question'], 'description': v['description']} 

528 for k, v in sorted(_DECISION_REGISTRY.items()) 

529 ], 

530 'summary': ( 

531 f"{len(_DECISION_REGISTRY)} decision topic(s):\n" 

532 + '\n'.join( 

533 f" - {k}: {v['question']}" 

534 for k, v in sorted(_DECISION_REGISTRY.items()) 

535 ) 

536 ), 

537 } 

538 

539 

540def get_system_health() -> Dict[str, Any]: 

541 """Top-level system health — combines GPU tier + Flask liveness. 

542 

543 Use when the user asks: "is Nunba healthy?", "what's broken?", 

544 "can you diagnose why X isn't working?". 

545 """ 

546 gpu = get_gpu_tier() 

547 hb = _get('/health') 

548 flask_ok = hb.get('available') is not False 

549 parts = [f"Nunba Flask: {'up' if flask_ok else 'down'}."] 

550 parts.append(gpu.get('summary', '')) 

551 # If we can, add counts 

552 models = _get('/api/admin/models') 

553 if models.get('available') is not False: 

554 m = models.get('models') or models.get('data') or [] 

555 loaded = [x for x in m if x.get('loaded') or x.get('status') == 'loaded'] 

556 parts.append(f"{len(m)} models registered, {len(loaded)} loaded.") 

557 return { 

558 'flask_ok': flask_ok, 

559 'gpu': gpu, 

560 'summary': ' '.join(p for p in parts if p), 

561 } 

562 

563 

564# ═══════════════════════════════════════════════════════════════════ 

565# Registry + dual-loader 

566# ═══════════════════════════════════════════════════════════════════ 

567 

568_TOOL_FUNCTIONS: List[Callable[..., Dict[str, Any]]] = [ 

569 get_gpu_tier, 

570 list_running_models, 

571 get_tts_status, 

572 get_tier_thresholds, 

573 get_boot_decision, 

574 get_system_health, 

575 list_decisions, # agent picks the right topic for user's "why" question 

576 explain_decision, # agent reads its own source code (code-RAG) 

577] 

578 

579 

580def get_tool_functions() -> List[Callable[..., Dict[str, Any]]]: 

581 """Canonical list for anyone who wants the raw functions.""" 

582 return list(_TOOL_FUNCTIONS) 

583 

584 

585def get_langchain_tools() -> List[Any]: 

586 """Wrap each introspect function as a LangChain Tool. 

587 

588 Returns `[]` if langchain isn't importable — no-op for non-agent 

589 deployments. Each Tool's `name` matches the function name so agent 

590 prompts can reference them directly ("call get_gpu_tier"). 

591 """ 

592 try: 

593 from langchain_core.tools import Tool # noqa: F401 — feature-detection probe 

594 except ImportError: 

595 try: 

596 from langchain.agents import Tool # type: ignore # noqa: F401 

597 except ImportError: 

598 logger.debug("langchain not importable — system_introspect tools unavailable") 

599 return [] 

600 

601 import inspect as _inspect 

602 

603 tools = [] 

604 for fn in _TOOL_FUNCTIONS: 

605 # Functions that take a real argument (explain_decision(topic)) 

606 # get the LangChain string passed through as the first positional 

607 # arg. Argless functions ignore it. Introspection-based so 

608 # future additions auto-pick the right calling convention. 

609 _sig = _inspect.signature(fn) 

610 _takes_arg = any( 

611 p.kind in (p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY) 

612 and p.default is not p.empty 

613 for p in _sig.parameters.values() 

614 ) 

615 

616 def _make_runner(_fn: Callable, _takes: bool) -> Callable[[str], str]: 

617 def _run(query: str = '') -> str: 

618 result = _fn(query) if _takes and query else _fn() 

619 return result.get('summary') or str(result) 

620 _run.__name__ = _fn.__name__ 

621 return _run 

622 

623 from core.constants import TOOL_LABELS 

624 tools.append(labeled_tool( 

625 name=fn.__name__, 

626 func=_make_runner(fn, _takes_arg), 

627 description=(fn.__doc__ or fn.__name__).strip().split('\n')[0], 

628 ui_label=TOOL_LABELS.get(fn.__name__, f"Checking {fn.__name__}…"), 

629 )) 

630 return tools 

631 

632 

633def register_autogen(assistant_agent: Any, user_proxy_agent: Any) -> int: 

634 """Register every introspect function with an autogen agent pair. 

635 

636 Routed through `core.labeled_autogen_function.register_labeled_function` 

637 so each invocation emits a `publish_chat_stage('tool_call', …)` UI 

638 status — same chokepoint LangChain tools traverse via 

639 `_with_tool_logging`. 

640 

641 Returns count registered. Silent no-op if the wrapper isn't 

642 importable (autogen missing or core not on sys.path). 

643 """ 

644 try: 

645 from core.labeled_autogen_function import register_labeled_function 

646 from core.constants import TOOL_LABELS 

647 except ImportError: 

648 logger.warning( 

649 "core.labeled_autogen_function unavailable — " 

650 "system_introspect autogen tools NOT registered", 

651 exc_info=True, 

652 ) 

653 return 0 

654 count = 0 

655 for fn in _TOOL_FUNCTIONS: 

656 try: 

657 register_labeled_function( 

658 fn, 

659 caller=assistant_agent, 

660 executor=user_proxy_agent, 

661 name=fn.__name__, 

662 description=(fn.__doc__ or fn.__name__).strip().split('\n')[0], 

663 ui_label=TOOL_LABELS.get( 

664 fn.__name__, f"Checking {fn.__name__}…" 

665 ), 

666 ) 

667 count += 1 

668 except Exception: 

669 logger.warning( 

670 "register_labeled_function failed for %s", 

671 fn.__name__, exc_info=True, 

672 ) 

673 return count 

674 

675 

676__all__ = [ 

677 'get_gpu_tier', 

678 'list_running_models', 

679 'get_tts_status', 

680 'get_tier_thresholds', 

681 'get_boot_decision', 

682 'get_system_health', 

683 'list_decisions', 

684 'explain_decision', 

685 'get_tool_functions', 

686 'get_langchain_tools', 

687 'register_autogen', 

688]