Coverage for core / platform / ai_capabilities.py: 100.0%
98 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2AI Capability Intents — Declarative AI for every app in HART OS.
4Apps declare what AI they need; the OS provides it. No app bundles llama.cpp.
5This is the abstraction that makes HART OS fundamentally different from
6Windows/Linux/macOS.
8Usage in AppManifest:
9 AppManifest(
10 id='translator',
11 ai_capabilities=[
12 AICapability(type='llm', min_accuracy=0.7).to_dict(),
13 AICapability(type='tts', required=False).to_dict(),
14 ],
15 )
17Resolution:
18 router = CapabilityRouter(model_registry, vram_manager)
19 result = router.resolve(AICapability(type='llm'))
20 # -> ResolvedCapability(model_id='qwen3.5-4b-local', backend='local_llm', ...)
21"""
23import logging
24from dataclasses import dataclass, field
25from enum import Enum
26from typing import Any, Dict, List, Optional
28logger = logging.getLogger('hevolve.platform')
31# ─── Capability Types ───────────────────────────────────────────
33class AICapabilityType(Enum):
34 """All AI capability types the OS can provide."""
35 LLM = 'llm'
36 VISION = 'vision'
37 TTS = 'tts'
38 STT = 'stt'
39 IMAGE_GEN = 'image_gen'
40 EMBEDDING = 'embedding'
41 CODE = 'code'
44# ─── Capability Declaration ─────────────────────────────────────
46@dataclass
47class AICapability:
48 """A single AI capability an app needs from the OS.
50 Declarative: the app says WHAT it needs, not HOW to provide it.
51 The CapabilityRouter resolves this to a concrete model backend.
52 """
53 type: str # AICapabilityType value
54 required: bool = True # False = nice-to-have
55 local_only: bool = False # Never route to cloud
56 min_accuracy: float = 0.0 # 0.0-1.0 quality threshold
57 max_latency_ms: float = 0.0 # 0 = no constraint
58 max_cost_spark: float = 0.0 # 0 = no constraint
59 options: Dict[str, Any] = field(default_factory=dict)
61 def to_dict(self) -> Dict[str, Any]:
62 """Serialize for AppManifest storage."""
63 return {
64 'type': self.type,
65 'required': self.required,
66 'local_only': self.local_only,
67 'min_accuracy': self.min_accuracy,
68 'max_latency_ms': self.max_latency_ms,
69 'max_cost_spark': self.max_cost_spark,
70 'options': self.options,
71 }
73 @classmethod
74 def from_dict(cls, data: Dict[str, Any]) -> 'AICapability':
75 """Deserialize from dict."""
76 return cls(**{k: v for k, v in data.items()
77 if k in cls.__dataclass_fields__})
80# ─── Resolved Capability ────────────────────────────────────────
82@dataclass
83class ResolvedCapability:
84 """Result of resolving an AICapability to a concrete backend."""
85 capability_type: str
86 model_id: str
87 backend: str # 'local_llm', 'mesh', 'cloud_tts', etc.
88 is_local: bool
89 estimated_latency_ms: float
90 estimated_cost_spark: float
91 available: bool
92 reason: str = '' # Why unavailable, if not available
94 def to_dict(self) -> Dict[str, Any]:
95 """Serialize for API responses."""
96 return {
97 'capability_type': self.capability_type,
98 'model_id': self.model_id,
99 'backend': self.backend,
100 'is_local': self.is_local,
101 'estimated_latency_ms': self.estimated_latency_ms,
102 'estimated_cost_spark': self.estimated_cost_spark,
103 'available': self.available,
104 'reason': self.reason,
105 }
108# ─── Capability type → model type mapping ───────────────────────
110_CAPABILITY_MODEL_MAP = {
111 'llm': 'llm',
112 'vision': 'vision',
113 'tts': 'tts',
114 'stt': 'stt',
115 'image_gen': 'image_gen',
116 'embedding': 'llm',
117 'code': 'llm',
118}
121# ─── Capability Router ──────────────────────────────────────────
123class CapabilityRouter:
124 """Resolves AI capability intents to concrete model backends.
126 Uses existing ModelRegistry for backend selection and VRAMManager
127 for resource-aware routing. Pure resolver — no state, no side effects
128 beyond event emission.
129 """
131 def __init__(self, model_registry=None, vram_manager=None):
132 self._model_registry = model_registry
133 self._vram_manager = vram_manager
135 def resolve(self, capability: AICapability) -> ResolvedCapability:
136 """Resolve a single capability to the best available backend.
138 Maps capability constraints to ModelRegistry.get_model_by_policy():
139 - local_only -> policy='local_only'
140 - min_accuracy > 0.7 -> prefer EXPERT tier
141 - max_cost_spark == 0 -> policy='local_only' (free models only)
142 """
143 cap_type = capability.type
144 unavailable = ResolvedCapability(
145 capability_type=cap_type, model_id='', backend='',
146 is_local=False, estimated_latency_ms=0, estimated_cost_spark=0,
147 available=False,
148 )
150 if not self._model_registry:
151 unavailable.reason = 'no model registry available'
152 self._emit_unavailable(cap_type, unavailable.reason)
153 return unavailable
155 # Determine policy from constraints
156 if capability.local_only or capability.max_cost_spark == 0:
157 policy = 'local_only'
158 elif capability.min_accuracy >= 0.7:
159 policy = 'any' # allow cloud for high quality
160 else:
161 policy = 'local_preferred'
163 # Find matching model
164 try:
165 model = self._model_registry.get_model_by_policy(
166 policy=policy,
167 min_accuracy=capability.min_accuracy,
168 )
169 except Exception:
170 model = None
172 if not model:
173 unavailable.reason = f'no {cap_type} model matching policy={policy}'
174 self._emit_unavailable(cap_type, unavailable.reason)
175 return unavailable
177 # Check latency constraint
178 if (capability.max_latency_ms > 0 and
179 model.avg_latency_ms > capability.max_latency_ms):
180 unavailable.reason = (
181 f'best model {model.model_id} latency {model.avg_latency_ms}ms '
182 f'exceeds max {capability.max_latency_ms}ms'
183 )
184 self._emit_unavailable(cap_type, unavailable.reason)
185 return unavailable
187 # Check cost constraint
188 if (capability.max_cost_spark > 0 and
189 model.cost_per_1k_tokens > capability.max_cost_spark):
190 unavailable.reason = (
191 f'model {model.model_id} cost {model.cost_per_1k_tokens} '
192 f'exceeds max {capability.max_cost_spark}'
193 )
194 self._emit_unavailable(cap_type, unavailable.reason)
195 return unavailable
197 # Check VRAM if local model
198 if model.is_local and self._vram_manager:
199 try:
200 gpu = self._vram_manager.detect_gpu()
201 if gpu and gpu.get('free_mb', 0) < 500:
202 # Low VRAM — still available but note it
203 logger.debug("Low VRAM for %s, model may use CPU offload",
204 model.model_id)
205 except Exception:
206 pass
208 backend = 'local' if model.is_local else 'cloud'
209 resolved = ResolvedCapability(
210 capability_type=cap_type,
211 model_id=model.model_id,
212 backend=backend,
213 is_local=model.is_local,
214 estimated_latency_ms=model.avg_latency_ms,
215 estimated_cost_spark=model.cost_per_1k_tokens,
216 available=True,
217 )
219 self._emit_resolved(cap_type, resolved)
220 return resolved
222 def resolve_all(self, capabilities: List[AICapability]) -> List[ResolvedCapability]:
223 """Resolve all capabilities for an app."""
224 return [self.resolve(cap) for cap in capabilities]
226 def can_satisfy(self, capabilities: List[AICapability]) -> bool:
227 """Check if all required capabilities can be satisfied."""
228 for cap in capabilities:
229 if cap.required:
230 resolved = self.resolve(cap)
231 if not resolved.available:
232 return False
233 return True
235 def _emit_resolved(self, cap_type: str, resolved: ResolvedCapability) -> None:
236 """Emit capability.resolved event (best-effort)."""
237 try:
238 from core.platform.events import emit_event
239 emit_event('capability.resolved', {
240 'type': cap_type,
241 'model_id': resolved.model_id,
242 'is_local': resolved.is_local,
243 })
244 except Exception:
245 pass
247 def _emit_unavailable(self, cap_type: str, reason: str) -> None:
248 """Emit capability.unavailable event (best-effort)."""
249 try:
250 from core.platform.events import emit_event
251 emit_event('capability.unavailable', {
252 'type': cap_type,
253 'reason': reason,
254 })
255 except Exception:
256 pass
258 def health(self) -> dict:
259 """Health report for ServiceRegistry."""
260 return {
261 'status': 'ok',
262 'has_model_registry': self._model_registry is not None,
263 'has_vram_manager': self._vram_manager is not None,
264 }