Coverage for core/platform/ai_capabilities.py: 100.0%

1"""

2AI Capability Intents — Declarative AI for every app in HART OS.

4Apps declare what AI they need; the OS provides it. No app bundles llama.cpp.

5This is the abstraction that makes HART OS fundamentally different from

6Windows/Linux/macOS.

8Usage in AppManifest:

9 AppManifest(

10 id='translator',

11 ai_capabilities=[

12 AICapability(type='llm', min_accuracy=0.7).to_dict(),

13 AICapability(type='tts', required=False).to_dict(),

14 ],

15 )

17Resolution:

18 router = CapabilityRouter(model_registry, vram_manager)

19 result = router.resolve(AICapability(type='llm'))

20 # -> ResolvedCapability(model_id='qwen3.5-4b-local', backend='local_llm', ...)

21"""

23import logging

24from dataclasses import dataclass, field

25from enum import Enum

26from typing import Any, Dict, List, Optional

28logger = logging.getLogger('hevolve.platform')

31# ─── Capability Types ───────────────────────────────────────────

33class AICapabilityType(Enum):

34 """All AI capability types the OS can provide."""

35 LLM = 'llm'

36 VISION = 'vision'

37 TTS = 'tts'

38 STT = 'stt'

39 IMAGE_GEN = 'image_gen'

40 EMBEDDING = 'embedding'

41 CODE = 'code'

44# ─── Capability Declaration ─────────────────────────────────────

46@dataclass

47class AICapability:

48 """A single AI capability an app needs from the OS.

50 Declarative: the app says WHAT it needs, not HOW to provide it.

51 The CapabilityRouter resolves this to a concrete model backend.

52 """

53 type: str # AICapabilityType value

54 required: bool = True # False = nice-to-have

55 local_only: bool = False # Never route to cloud

56 min_accuracy: float = 0.0 # 0.0-1.0 quality threshold

57 max_latency_ms: float = 0.0 # 0 = no constraint

58 max_cost_spark: float = 0.0 # 0 = no constraint

59 options: Dict[str, Any] = field(default_factory=dict)

61 def to_dict(self) -> Dict[str, Any]:

62 """Serialize for AppManifest storage."""

63 return {

64 'type': self.type,

65 'required': self.required,

66 'local_only': self.local_only,

67 'min_accuracy': self.min_accuracy,

68 'max_latency_ms': self.max_latency_ms,

69 'max_cost_spark': self.max_cost_spark,

70 'options': self.options,

71 }

73 @classmethod

74 def from_dict(cls, data: Dict[str, Any]) -> 'AICapability':

75 """Deserialize from dict."""

76 return cls(**{k: v for k, v in data.items()

77 if k in cls.__dataclass_fields__})

80# ─── Resolved Capability ────────────────────────────────────────

82@dataclass

83class ResolvedCapability:

84 """Result of resolving an AICapability to a concrete backend."""

85 capability_type: str

86 model_id: str

87 backend: str # 'local_llm', 'mesh', 'cloud_tts', etc.

88 is_local: bool

89 estimated_latency_ms: float

90 estimated_cost_spark: float

91 available: bool

92 reason: str = '' # Why unavailable, if not available

94 def to_dict(self) -> Dict[str, Any]:

95 """Serialize for API responses."""

96 return {

97 'capability_type': self.capability_type,

98 'model_id': self.model_id,

99 'backend': self.backend,

100 'is_local': self.is_local,

101 'estimated_latency_ms': self.estimated_latency_ms,

102 'estimated_cost_spark': self.estimated_cost_spark,

103 'available': self.available,

104 'reason': self.reason,

105 }

106

107

108# ─── Capability type → model type mapping ───────────────────────

109

110_CAPABILITY_MODEL_MAP = {

111 'llm': 'llm',

112 'vision': 'vision',

113 'tts': 'tts',

114 'stt': 'stt',

115 'image_gen': 'image_gen',

116 'embedding': 'llm',

117 'code': 'llm',

118}

119

120

121# ─── Capability Router ──────────────────────────────────────────

122

123class CapabilityRouter:

124 """Resolves AI capability intents to concrete model backends.

125

126 Uses existing ModelRegistry for backend selection and VRAMManager

127 for resource-aware routing. Pure resolver — no state, no side effects

128 beyond event emission.

129 """

130

131 def __init__(self, model_registry=None, vram_manager=None):

132 self._model_registry = model_registry

133 self._vram_manager = vram_manager

134

135 def resolve(self, capability: AICapability) -> ResolvedCapability:

136 """Resolve a single capability to the best available backend.

137

138 Maps capability constraints to ModelRegistry.get_model_by_policy():

139 - local_only -> policy='local_only'

140 - min_accuracy > 0.7 -> prefer EXPERT tier

141 - max_cost_spark == 0 -> policy='local_only' (free models only)

142 """

143 cap_type = capability.type

144 unavailable = ResolvedCapability(

145 capability_type=cap_type, model_id='', backend='',

146 is_local=False, estimated_latency_ms=0, estimated_cost_spark=0,

147 available=False,

148 )

149

150 if not self._model_registry:

151 unavailable.reason = 'no model registry available'

152 self._emit_unavailable(cap_type, unavailable.reason)

153 return unavailable

154

155 # Determine policy from constraints

156 if capability.local_only or capability.max_cost_spark == 0:

157 policy = 'local_only'

158 elif capability.min_accuracy >= 0.7:

159 policy = 'any' # allow cloud for high quality

160 else:

161 policy = 'local_preferred'

162

163 # Find matching model

164 try:

165 model = self._model_registry.get_model_by_policy(

166 policy=policy,

167 min_accuracy=capability.min_accuracy,

168 )

169 except Exception:

170 model = None

171

172 if not model:

173 unavailable.reason = f'no {cap_type} model matching policy={policy}'

174 self._emit_unavailable(cap_type, unavailable.reason)

175 return unavailable

176

177 # Check latency constraint

178 if (capability.max_latency_ms > 0 and

179 model.avg_latency_ms > capability.max_latency_ms):

180 unavailable.reason = (

181 f'best model {model.model_id} latency {model.avg_latency_ms}ms '

182 f'exceeds max {capability.max_latency_ms}ms'

183 )

184 self._emit_unavailable(cap_type, unavailable.reason)

185 return unavailable

186

187 # Check cost constraint

188 if (capability.max_cost_spark > 0 and

189 model.cost_per_1k_tokens > capability.max_cost_spark):

190 unavailable.reason = (

191 f'model {model.model_id} cost {model.cost_per_1k_tokens} '

192 f'exceeds max {capability.max_cost_spark}'

193 )

194 self._emit_unavailable(cap_type, unavailable.reason)

195 return unavailable

196

197 # Check VRAM if local model

198 if model.is_local and self._vram_manager:

199 try:

200 gpu = self._vram_manager.detect_gpu()

201 if gpu and gpu.get('free_mb', 0) < 500:

202 # Low VRAM — still available but note it

203 logger.debug("Low VRAM for %s, model may use CPU offload",

204 model.model_id)

205 except Exception:

206 pass

207

208 backend = 'local' if model.is_local else 'cloud'

209 resolved = ResolvedCapability(

210 capability_type=cap_type,

211 model_id=model.model_id,

212 backend=backend,

213 is_local=model.is_local,

214 estimated_latency_ms=model.avg_latency_ms,

215 estimated_cost_spark=model.cost_per_1k_tokens,

216 available=True,

217 )

218

219 self._emit_resolved(cap_type, resolved)

220 return resolved

221

222 def resolve_all(self, capabilities: List[AICapability]) -> List[ResolvedCapability]:

223 """Resolve all capabilities for an app."""

224 return [self.resolve(cap) for cap in capabilities]

225

226 def can_satisfy(self, capabilities: List[AICapability]) -> bool:

227 """Check if all required capabilities can be satisfied."""

228 for cap in capabilities:

229 if cap.required:

230 resolved = self.resolve(cap)

231 if not resolved.available:

232 return False

233 return True

234

235 def _emit_resolved(self, cap_type: str, resolved: ResolvedCapability) -> None:

236 """Emit capability.resolved event (best-effort)."""

237 try:

238 from core.platform.events import emit_event

239 emit_event('capability.resolved', {

240 'type': cap_type,

241 'model_id': resolved.model_id,

242 'is_local': resolved.is_local,

243 })

244 except Exception:

245 pass

246

247 def _emit_unavailable(self, cap_type: str, reason: str) -> None:

248 """Emit capability.unavailable event (best-effort)."""

249 try:

250 from core.platform.events import emit_event

251 emit_event('capability.unavailable', {

252 'type': cap_type,

253 'reason': reason,

254 })

255 except Exception:

256 pass

257

258 def health(self) -> dict:

259 """Health report for ServiceRegistry."""

260 return {

261 'status': 'ok',

262 'has_model_registry': self._model_registry is not None,

263 'has_vram_manager': self._vram_manager is not None,

264 }

Coverage for core / platform / ai_capabilities.py: 100.0%

98 statements