Coverage for core / platform / ai_capabilities.py: 100.0%

98 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2AI Capability Intents — Declarative AI for every app in HART OS. 

3 

4Apps declare what AI they need; the OS provides it. No app bundles llama.cpp. 

5This is the abstraction that makes HART OS fundamentally different from 

6Windows/Linux/macOS. 

7 

8Usage in AppManifest: 

9 AppManifest( 

10 id='translator', 

11 ai_capabilities=[ 

12 AICapability(type='llm', min_accuracy=0.7).to_dict(), 

13 AICapability(type='tts', required=False).to_dict(), 

14 ], 

15 ) 

16 

17Resolution: 

18 router = CapabilityRouter(model_registry, vram_manager) 

19 result = router.resolve(AICapability(type='llm')) 

20 # -> ResolvedCapability(model_id='qwen3.5-4b-local', backend='local_llm', ...) 

21""" 

22 

23import logging 

24from dataclasses import dataclass, field 

25from enum import Enum 

26from typing import Any, Dict, List, Optional 

27 

28logger = logging.getLogger('hevolve.platform') 

29 

30 

31# ─── Capability Types ─────────────────────────────────────────── 

32 

33class AICapabilityType(Enum): 

34 """All AI capability types the OS can provide.""" 

35 LLM = 'llm' 

36 VISION = 'vision' 

37 TTS = 'tts' 

38 STT = 'stt' 

39 IMAGE_GEN = 'image_gen' 

40 EMBEDDING = 'embedding' 

41 CODE = 'code' 

42 

43 

44# ─── Capability Declaration ───────────────────────────────────── 

45 

46@dataclass 

47class AICapability: 

48 """A single AI capability an app needs from the OS. 

49 

50 Declarative: the app says WHAT it needs, not HOW to provide it. 

51 The CapabilityRouter resolves this to a concrete model backend. 

52 """ 

53 type: str # AICapabilityType value 

54 required: bool = True # False = nice-to-have 

55 local_only: bool = False # Never route to cloud 

56 min_accuracy: float = 0.0 # 0.0-1.0 quality threshold 

57 max_latency_ms: float = 0.0 # 0 = no constraint 

58 max_cost_spark: float = 0.0 # 0 = no constraint 

59 options: Dict[str, Any] = field(default_factory=dict) 

60 

61 def to_dict(self) -> Dict[str, Any]: 

62 """Serialize for AppManifest storage.""" 

63 return { 

64 'type': self.type, 

65 'required': self.required, 

66 'local_only': self.local_only, 

67 'min_accuracy': self.min_accuracy, 

68 'max_latency_ms': self.max_latency_ms, 

69 'max_cost_spark': self.max_cost_spark, 

70 'options': self.options, 

71 } 

72 

73 @classmethod 

74 def from_dict(cls, data: Dict[str, Any]) -> 'AICapability': 

75 """Deserialize from dict.""" 

76 return cls(**{k: v for k, v in data.items() 

77 if k in cls.__dataclass_fields__}) 

78 

79 

80# ─── Resolved Capability ──────────────────────────────────────── 

81 

82@dataclass 

83class ResolvedCapability: 

84 """Result of resolving an AICapability to a concrete backend.""" 

85 capability_type: str 

86 model_id: str 

87 backend: str # 'local_llm', 'mesh', 'cloud_tts', etc. 

88 is_local: bool 

89 estimated_latency_ms: float 

90 estimated_cost_spark: float 

91 available: bool 

92 reason: str = '' # Why unavailable, if not available 

93 

94 def to_dict(self) -> Dict[str, Any]: 

95 """Serialize for API responses.""" 

96 return { 

97 'capability_type': self.capability_type, 

98 'model_id': self.model_id, 

99 'backend': self.backend, 

100 'is_local': self.is_local, 

101 'estimated_latency_ms': self.estimated_latency_ms, 

102 'estimated_cost_spark': self.estimated_cost_spark, 

103 'available': self.available, 

104 'reason': self.reason, 

105 } 

106 

107 

108# ─── Capability type → model type mapping ─────────────────────── 

109 

110_CAPABILITY_MODEL_MAP = { 

111 'llm': 'llm', 

112 'vision': 'vision', 

113 'tts': 'tts', 

114 'stt': 'stt', 

115 'image_gen': 'image_gen', 

116 'embedding': 'llm', 

117 'code': 'llm', 

118} 

119 

120 

121# ─── Capability Router ────────────────────────────────────────── 

122 

123class CapabilityRouter: 

124 """Resolves AI capability intents to concrete model backends. 

125 

126 Uses existing ModelRegistry for backend selection and VRAMManager 

127 for resource-aware routing. Pure resolver — no state, no side effects 

128 beyond event emission. 

129 """ 

130 

131 def __init__(self, model_registry=None, vram_manager=None): 

132 self._model_registry = model_registry 

133 self._vram_manager = vram_manager 

134 

135 def resolve(self, capability: AICapability) -> ResolvedCapability: 

136 """Resolve a single capability to the best available backend. 

137 

138 Maps capability constraints to ModelRegistry.get_model_by_policy(): 

139 - local_only -> policy='local_only' 

140 - min_accuracy > 0.7 -> prefer EXPERT tier 

141 - max_cost_spark == 0 -> policy='local_only' (free models only) 

142 """ 

143 cap_type = capability.type 

144 unavailable = ResolvedCapability( 

145 capability_type=cap_type, model_id='', backend='', 

146 is_local=False, estimated_latency_ms=0, estimated_cost_spark=0, 

147 available=False, 

148 ) 

149 

150 if not self._model_registry: 

151 unavailable.reason = 'no model registry available' 

152 self._emit_unavailable(cap_type, unavailable.reason) 

153 return unavailable 

154 

155 # Determine policy from constraints 

156 if capability.local_only or capability.max_cost_spark == 0: 

157 policy = 'local_only' 

158 elif capability.min_accuracy >= 0.7: 

159 policy = 'any' # allow cloud for high quality 

160 else: 

161 policy = 'local_preferred' 

162 

163 # Find matching model 

164 try: 

165 model = self._model_registry.get_model_by_policy( 

166 policy=policy, 

167 min_accuracy=capability.min_accuracy, 

168 ) 

169 except Exception: 

170 model = None 

171 

172 if not model: 

173 unavailable.reason = f'no {cap_type} model matching policy={policy}' 

174 self._emit_unavailable(cap_type, unavailable.reason) 

175 return unavailable 

176 

177 # Check latency constraint 

178 if (capability.max_latency_ms > 0 and 

179 model.avg_latency_ms > capability.max_latency_ms): 

180 unavailable.reason = ( 

181 f'best model {model.model_id} latency {model.avg_latency_ms}ms ' 

182 f'exceeds max {capability.max_latency_ms}ms' 

183 ) 

184 self._emit_unavailable(cap_type, unavailable.reason) 

185 return unavailable 

186 

187 # Check cost constraint 

188 if (capability.max_cost_spark > 0 and 

189 model.cost_per_1k_tokens > capability.max_cost_spark): 

190 unavailable.reason = ( 

191 f'model {model.model_id} cost {model.cost_per_1k_tokens} ' 

192 f'exceeds max {capability.max_cost_spark}' 

193 ) 

194 self._emit_unavailable(cap_type, unavailable.reason) 

195 return unavailable 

196 

197 # Check VRAM if local model 

198 if model.is_local and self._vram_manager: 

199 try: 

200 gpu = self._vram_manager.detect_gpu() 

201 if gpu and gpu.get('free_mb', 0) < 500: 

202 # Low VRAM — still available but note it 

203 logger.debug("Low VRAM for %s, model may use CPU offload", 

204 model.model_id) 

205 except Exception: 

206 pass 

207 

208 backend = 'local' if model.is_local else 'cloud' 

209 resolved = ResolvedCapability( 

210 capability_type=cap_type, 

211 model_id=model.model_id, 

212 backend=backend, 

213 is_local=model.is_local, 

214 estimated_latency_ms=model.avg_latency_ms, 

215 estimated_cost_spark=model.cost_per_1k_tokens, 

216 available=True, 

217 ) 

218 

219 self._emit_resolved(cap_type, resolved) 

220 return resolved 

221 

222 def resolve_all(self, capabilities: List[AICapability]) -> List[ResolvedCapability]: 

223 """Resolve all capabilities for an app.""" 

224 return [self.resolve(cap) for cap in capabilities] 

225 

226 def can_satisfy(self, capabilities: List[AICapability]) -> bool: 

227 """Check if all required capabilities can be satisfied.""" 

228 for cap in capabilities: 

229 if cap.required: 

230 resolved = self.resolve(cap) 

231 if not resolved.available: 

232 return False 

233 return True 

234 

235 def _emit_resolved(self, cap_type: str, resolved: ResolvedCapability) -> None: 

236 """Emit capability.resolved event (best-effort).""" 

237 try: 

238 from core.platform.events import emit_event 

239 emit_event('capability.resolved', { 

240 'type': cap_type, 

241 'model_id': resolved.model_id, 

242 'is_local': resolved.is_local, 

243 }) 

244 except Exception: 

245 pass 

246 

247 def _emit_unavailable(self, cap_type: str, reason: str) -> None: 

248 """Emit capability.unavailable event (best-effort).""" 

249 try: 

250 from core.platform.events import emit_event 

251 emit_event('capability.unavailable', { 

252 'type': cap_type, 

253 'reason': reason, 

254 }) 

255 except Exception: 

256 pass 

257 

258 def health(self) -> dict: 

259 """Health report for ServiceRegistry.""" 

260 return { 

261 'status': 'ok', 

262 'has_model_registry': self._model_registry is not None, 

263 'has_vram_manager': self._vram_manager is not None, 

264 }