Coverage for core / resource_governor.py: 57.4%

610 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Resource Governor — HARTOS never slows down the host OS. 

3 

4Three modes: 

5 ACTIVE: User is busy -> HARTOS uses <5% CPU, no GPU, minimal RAM 

6 IDLE: User is idle -> HARTOS uses up to 50% CPU, available GPU, moderate RAM 

7 SLEEP: System on battery/low resources -> HARTOS suspends all non-essential work 

8 

9Enforcement layer (ResourceEnforcer): 

10 Hard OS-level caps so Nunba CANNOT exceed its budget, regardless of code bugs: 

11 - Windows: Job Object with CPU rate limit + memory limit on the process tree 

12 - Linux: cgroup v2 cpu.max + memory.max, fallback to RLIMIT_AS 

13 - macOS: RLIMIT_AS (soft cap) + process priority 

14 - GPU: CUDA_MPS_ACTIVE_THREAD_PERCENTAGE (NVIDIA MPS) or VRAM budget via lifecycle 

15 - Process priority: BELOW_NORMAL on Windows, nice +10 on POSIX 

16 

17 System buffer: always reserves 25% CPU, 2 GB RAM, 1 GB VRAM for the rest of the OS. 

18 Caps tighten/relax on mode transitions (ACTIVE → tight, IDLE → relaxed, SLEEP → minimal). 

19 

20Proactive Action Stream: 

21 When IDLE, periodically samples hive intelligence: 

22 - Check if any hive tasks match this node's capabilities 

23 - Pre-download popular models that users nearby are requesting 

24 - Run background benchmarks to optimize inference settings 

25 - Explore community signal feed for actionable insights 

26 

27All sampling is randomized (jitter) to avoid thundering herd across hive nodes. 

28 

29Integration: 

30 - Uses VRAMManager for GPU detection (vram_manager.detect_gpu()) 

31 - Reads model_lifecycle get_system_pressure() for existing throttle awareness 

32 - Emits EventBus events: 'resource.mode_changed', 'resource.proactive_action' 

33 - AgentDaemon and HiveTaskDispatcher check get_throttle() / should_proceed() 

34 - Lazy imports for all hive modules (zero startup cost) 

35 

36Singleton: get_governor() returns the module-level instance. 

37Convenience: should_proceed(resource) for quick checks from any module. 

38""" 

39 

40import ctypes 

41import logging 

42import os 

43import random 

44import struct 

45import sys 

46import threading 

47import time 

48from typing import Optional 

49 

50logger = logging.getLogger(__name__) 

51 

52# ═══════════════════════════════════════════════════════════════════════ 

53# Constants 

54# ═══════════════════════════════════════════════════════════════════════ 

55 

56# Mode thresholds 

57IDLE_THRESHOLD_SECONDS = 120 # 2 min of no input -> idle 

58ACTIVE_CPU_LIMIT = 0.05 # 5% CPU max when user is active 

59IDLE_CPU_LIMIT = 0.50 # 50% CPU max when idle 

60SLEEP_CPU_LIMIT = 0.0 # Nothing when sleeping 

61 

62# ── System buffer — always reserved for the rest of the OS ── 

63SYSTEM_BUFFER_CPU_FRACTION = 0.25 # Reserve 25% CPU cores for OS 

64SYSTEM_BUFFER_RAM_GB = 2.0 # Reserve 2 GB RAM for OS 

65SYSTEM_BUFFER_VRAM_GB = 1.0 # Reserve 1 GB VRAM for other apps 

66 

67# Proactive action intervals (seconds) — randomized +/-50% 

68SIGNAL_CHECK_INTERVAL = 60 # Check hive signals 

69TASK_CHECK_INTERVAL = 600 # Check pending tasks 

70MODEL_PREFETCH_INTERVAL = 1800 # Pre-download models 

71BENCHMARK_INTERVAL = 3600 # Run benchmarks 

72 

73# Battery thresholds 

74BATTERY_SLEEP_THRESHOLD = 0.20 # Force sleep below 20% 

75BATTERY_THROTTLE_THRESHOLD = 0.40 # Reduce activity below 40% 

76 

77# Monitor loop interval 

78_MONITOR_INTERVAL_SECONDS = 5 

79 

80# Mode constants 

81MODE_ACTIVE = 'active' 

82MODE_IDLE = 'idle' 

83MODE_SLEEP = 'sleep' 

84 

85 

86# ═══════════════════════════════════════════════════════════════════════ 

87# ResourceEnforcer — hard OS-level process caps 

88# ═══════════════════════════════════════════════════════════════════════ 

89 

90class ResourceEnforcer: 

91 """Enforce hard resource caps at the OS level. 

92 

93 The governor ADVISES. The enforcer CONSTRAINS. 

94 Even buggy code cannot exceed the caps set here. 

95 

96 Platform-specific mechanisms: 

97 Windows: Job Object (CPU rate limit + memory limit on process tree) 

98 Linux: cgroup v2 (cpu.max + memory.max), fallback RLIMIT_AS 

99 macOS: RLIMIT_AS (soft cap) + process priority 

100 

101 Process priority: BELOW_NORMAL on Windows, nice +10 on POSIX. 

102 This alone prevents Nunba from competing with foreground apps. 

103 """ 

104 

105 def __init__(self): 

106 self._enforced = False 

107 self._job_handle = None # Windows Job Object handle 

108 self._cgroup_path = None # Linux cgroup path 

109 self._original_nice = None 

110 

111 def enforce(self, cpu_fraction: float = 0.75, ram_fraction: float = 0.75, 

112 gpu_fraction: float = 0.75): 

113 """Apply hard caps. Call once at startup. 

114 

115 Args: 

116 cpu_fraction: Max fraction of total CPU Nunba can use (0.75 = 75%) 

117 ram_fraction: Max fraction of total RAM Nunba can use 

118 gpu_fraction: Max fraction of GPU Nunba can use (advisory for CUDA) 

119 """ 

120 if self._enforced: 

121 return 

122 

123 # Always subtract system buffer from the requested fraction 

124 total_ram_gb = self._get_total_ram_gb() 

125 usable_ram_gb = max(0.5, total_ram_gb * ram_fraction - SYSTEM_BUFFER_RAM_GB) 

126 

127 total_cores = os.cpu_count() or 4 

128 usable_cores = max(1, int(total_cores * (cpu_fraction - SYSTEM_BUFFER_CPU_FRACTION))) 

129 

130 logger.info("ResourceEnforcer: total_ram=%.1fGB, cap=%.1fGB, " 

131 "total_cores=%d, cap_cores=%d", 

132 total_ram_gb, usable_ram_gb, total_cores, usable_cores) 

133 

134 self._set_process_priority() 

135 self._enforce_cpu(cpu_fraction, usable_cores, total_cores) 

136 self._enforce_ram(usable_ram_gb) 

137 self._enforce_gpu(gpu_fraction) 

138 self._enforced = True 

139 logger.info("ResourceEnforcer: hard caps applied") 

140 

141 def update_caps(self, mode: str): 

142 """Tighten or relax caps based on governor mode. 

143 

144 ACTIVE: tight (25% CPU, 50% RAM, no GPU) 

145 IDLE: relaxed (75% CPU, 75% RAM, GPU allowed) 

146 SLEEP: minimal (5% CPU, 25% RAM, no GPU) 

147 """ 

148 if not self._enforced: 

149 return 

150 

151 caps = { 

152 MODE_ACTIVE: (0.25, 0.50, 0.0), 

153 MODE_IDLE: (0.75, 0.75, 0.75), 

154 MODE_SLEEP: (0.05, 0.25, 0.0), 

155 } 

156 cpu_frac, ram_frac, gpu_frac = caps.get(mode, (0.50, 0.50, 0.0)) 

157 

158 total_ram_gb = self._get_total_ram_gb() 

159 usable_ram_gb = max(0.5, total_ram_gb * ram_frac - SYSTEM_BUFFER_RAM_GB) 

160 total_cores = os.cpu_count() or 4 

161 

162 self._enforce_cpu(cpu_frac, max(1, int(total_cores * cpu_frac)), total_cores) 

163 self._enforce_ram(usable_ram_gb) 

164 self._enforce_gpu(gpu_frac) 

165 

166 # ── Process priority ────────────────────────────────────────────── 

167 

168 def _set_process_priority(self): 

169 """Set process to below-normal priority so foreground apps always win.""" 

170 try: 

171 if sys.platform == 'win32': 

172 # BELOW_NORMAL_PRIORITY_CLASS = 0x4000 

173 kernel32 = ctypes.windll.kernel32 

174 handle = kernel32.GetCurrentProcess() 

175 kernel32.SetPriorityClass(handle, 0x4000) 

176 logger.info("ResourceEnforcer: set BELOW_NORMAL priority (Windows)") 

177 else: 

178 self._original_nice = os.nice(0) 

179 os.nice(10) # Lower priority (higher nice = lower priority) 

180 logger.info("ResourceEnforcer: set nice +10 (POSIX)") 

181 except Exception as e: 

182 logger.debug("ResourceEnforcer: priority set failed: %s", e) 

183 

184 # ── CPU enforcement ─────────────────────────────────────────────── 

185 

186 def _enforce_cpu(self, cpu_fraction: float, usable_cores: int, 

187 total_cores: int): 

188 """Enforce CPU cap via Job Object (Windows) or cgroup (Linux).""" 

189 if sys.platform == 'win32': 

190 self._enforce_cpu_windows(cpu_fraction) 

191 elif sys.platform == 'linux': 

192 self._enforce_cpu_linux(cpu_fraction, total_cores) 

193 # macOS: no hard CPU cap, rely on nice priority + psutil affinity 

194 

195 # CPU affinity: restrict to subset of cores 

196 psutil = _try_import_psutil() 

197 if psutil is not None: 

198 try: 

199 p = psutil.Process() 

200 available = list(range(total_cores)) 

201 # Use last N cores (leave first cores for OS/foreground) 

202 target = available[-usable_cores:] if usable_cores < total_cores else available 

203 p.cpu_affinity(target) 

204 logger.info("ResourceEnforcer: CPU affinity set to cores %s", target) 

205 except Exception as e: 

206 logger.debug("ResourceEnforcer: CPU affinity failed: %s", e) 

207 

208 def _enforce_cpu_windows(self, cpu_fraction: float): 

209 """Windows: use Job Object CPU rate limit.""" 

210 try: 

211 kernel32 = ctypes.windll.kernel32 

212 

213 # CreateJobObjectW 

214 job = kernel32.CreateJobObjectW(None, None) 

215 if not job: 

216 logger.debug("ResourceEnforcer: CreateJobObject failed") 

217 return 

218 

219 # JOBOBJECT_CPU_RATE_CONTROL_INFORMATION 

220 # Enable CPU rate control with hard cap 

221 class JOBOBJECT_CPU_RATE_CONTROL_INFORMATION(ctypes.Structure): 

222 _fields_ = [ 

223 ('ControlFlags', ctypes.c_ulong), 

224 ('Value', ctypes.c_ulong), # Union, using CpuRate 

225 ] 

226 

227 # JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = 0x1 

228 # JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP = 0x4 

229 rate_info = JOBOBJECT_CPU_RATE_CONTROL_INFORMATION() 

230 rate_info.ControlFlags = 0x1 | 0x4 # Enable + Hard cap 

231 # CpuRate is in units of 1/100th of a percent (100 = 1%) 

232 rate_info.Value = max(100, int(cpu_fraction * 10000)) 

233 

234 # SetInformationJobObject with JobObjectCpuRateControlInformation (15) 

235 kernel32.SetInformationJobObject( 

236 job, 15, ctypes.byref(rate_info), ctypes.sizeof(rate_info)) 

237 

238 # Assign current process to the Job Object 

239 kernel32.AssignProcessToJobObject( 

240 job, kernel32.GetCurrentProcess()) 

241 

242 self._job_handle = job 

243 logger.info("ResourceEnforcer: Windows Job Object CPU rate = %.0f%%", 

244 cpu_fraction * 100) 

245 except Exception as e: 

246 logger.debug("ResourceEnforcer: Windows Job Object failed: %s", e) 

247 

248 def _enforce_cpu_linux(self, cpu_fraction: float, total_cores: int): 

249 """Linux: use cgroup v2 cpu.max.""" 

250 try: 

251 # Try cgroup v2 

252 cg_path = f'/sys/fs/cgroup/nunba_{os.getpid()}' 

253 if os.path.isdir('/sys/fs/cgroup/cgroup.controllers'): 

254 os.makedirs(cg_path, exist_ok=True) 

255 period = 100000 # 100ms 

256 quota = int(period * cpu_fraction * total_cores) 

257 with open(os.path.join(cg_path, 'cpu.max'), 'w') as f: 

258 f.write(f'{quota} {period}') 

259 # Move current process into the cgroup 

260 with open(os.path.join(cg_path, 'cgroup.procs'), 'w') as f: 

261 f.write(str(os.getpid())) 

262 self._cgroup_path = cg_path 

263 logger.info("ResourceEnforcer: cgroup v2 cpu.max = %d/%d (%.0f%%)", 

264 quota, period, cpu_fraction * 100) 

265 except PermissionError: 

266 logger.debug("ResourceEnforcer: cgroup v2 requires root, skipping") 

267 except Exception as e: 

268 logger.debug("ResourceEnforcer: cgroup cpu failed: %s", e) 

269 

270 # ── RAM enforcement ─────────────────────────────────────────────── 

271 

272 def _enforce_ram(self, max_ram_gb: float): 

273 """Enforce RAM cap via Job Object (Windows), cgroup (Linux), or RLIMIT.""" 

274 max_bytes = int(max_ram_gb * 1024 * 1024 * 1024) 

275 

276 if sys.platform == 'win32': 

277 self._enforce_ram_windows(max_bytes) 

278 elif sys.platform == 'linux': 

279 self._enforce_ram_linux(max_bytes) 

280 else: 

281 self._enforce_ram_rlimit(max_bytes) 

282 

283 def _enforce_ram_windows(self, max_bytes: int): 

284 """Windows: Job Object memory limit.""" 

285 if not self._job_handle: 

286 return 

287 try: 

288 kernel32 = ctypes.windll.kernel32 

289 

290 # JOBOBJECT_EXTENDED_LIMIT_INFORMATION 

291 class IO_COUNTERS(ctypes.Structure): 

292 _fields_ = [('_' + str(i), ctypes.c_ulonglong) for i in range(6)] 

293 

294 class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure): 

295 _fields_ = [ 

296 ('PerProcessUserTimeLimit', ctypes.c_longlong), 

297 ('PerJobUserTimeLimit', ctypes.c_longlong), 

298 ('LimitFlags', ctypes.c_ulong), 

299 ('MinimumWorkingSetSize', ctypes.c_size_t), 

300 ('MaximumWorkingSetSize', ctypes.c_size_t), 

301 ('ActiveProcessLimit', ctypes.c_ulong), 

302 ('Affinity', ctypes.c_size_t), 

303 ('PriorityClass', ctypes.c_ulong), 

304 ('SchedulingClass', ctypes.c_ulong), 

305 ] 

306 

307 class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure): 

308 _fields_ = [ 

309 ('BasicLimitInformation', JOBOBJECT_BASIC_LIMIT_INFORMATION), 

310 ('IoInfo', IO_COUNTERS), 

311 ('ProcessMemoryLimit', ctypes.c_size_t), 

312 ('JobMemoryLimit', ctypes.c_size_t), 

313 ('PeakProcessMemoryUsed', ctypes.c_size_t), 

314 ('PeakJobMemoryUsed', ctypes.c_size_t), 

315 ] 

316 

317 info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION() 

318 # JOB_OBJECT_LIMIT_JOB_MEMORY = 0x200 

319 info.BasicLimitInformation.LimitFlags = 0x200 

320 info.JobMemoryLimit = max_bytes 

321 

322 # SetInformationJobObject with JobObjectExtendedLimitInformation (9) 

323 kernel32.SetInformationJobObject( 

324 self._job_handle, 9, ctypes.byref(info), ctypes.sizeof(info)) 

325 

326 logger.info("ResourceEnforcer: Windows Job memory limit = %.1f GB", 

327 max_bytes / (1024**3)) 

328 except Exception as e: 

329 logger.debug("ResourceEnforcer: Windows memory limit failed: %s", e) 

330 

331 def _enforce_ram_linux(self, max_bytes: int): 

332 """Linux: cgroup v2 memory.max.""" 

333 if self._cgroup_path: 

334 try: 

335 with open(os.path.join(self._cgroup_path, 'memory.max'), 'w') as f: 

336 f.write(str(max_bytes)) 

337 logger.info("ResourceEnforcer: cgroup memory.max = %.1f GB", 

338 max_bytes / (1024**3)) 

339 return 

340 except Exception as e: 

341 logger.debug("ResourceEnforcer: cgroup memory failed: %s", e) 

342 

343 self._enforce_ram_rlimit(max_bytes) 

344 

345 def _enforce_ram_rlimit(self, max_bytes: int): 

346 """POSIX: RLIMIT_AS (soft cap — process gets MemoryError on exceed).""" 

347 try: 

348 import resource 

349 soft, hard = resource.getrlimit(resource.RLIMIT_AS) 

350 resource.setrlimit(resource.RLIMIT_AS, (max_bytes, hard)) 

351 logger.info("ResourceEnforcer: RLIMIT_AS = %.1f GB", 

352 max_bytes / (1024**3)) 

353 except Exception as e: 

354 logger.debug("ResourceEnforcer: RLIMIT_AS failed: %s", e) 

355 

356 # ── GPU enforcement ─────────────────────────────────────────────── 

357 

358 def _enforce_gpu(self, gpu_fraction: float): 

359 """Enforce GPU cap via CUDA environment variables. 

360 

361 CUDA_MPS_ACTIVE_THREAD_PERCENTAGE limits GPU SM utilization. 

362 CUDA_VISIBLE_DEVICES can restrict which GPUs are used. 

363 Also reserves SYSTEM_BUFFER_VRAM_GB via VRAMManager budget. 

364 """ 

365 if gpu_fraction <= 0: 

366 # No GPU allowed in this mode 

367 os.environ['CUDA_VISIBLE_DEVICES'] = '' 

368 return 

369 

370 pct = max(10, int(gpu_fraction * 100)) 

371 os.environ['CUDA_MPS_ACTIVE_THREAD_PERCENTAGE'] = str(pct) 

372 

373 # Restore GPU visibility if previously hidden 

374 if os.environ.get('CUDA_VISIBLE_DEVICES') == '': 

375 os.environ.pop('CUDA_VISIBLE_DEVICES', None) 

376 

377 # Set VRAM budget via lifecycle (leaves buffer for other apps) 

378 try: 

379 from integrations.service_tools.vram_manager import vram_manager 

380 info = vram_manager.detect_gpu() 

381 total_vram = info.get('total_gb', 0) 

382 if total_vram > 0: 

383 budget = max(0.5, total_vram * gpu_fraction - SYSTEM_BUFFER_VRAM_GB) 

384 os.environ['HARTOS_VRAM_BUDGET_GB'] = str(round(budget, 1)) 

385 logger.info("ResourceEnforcer: GPU %d%% SM, VRAM budget %.1fGB " 

386 "(total %.1fGB, buffer %.1fGB)", 

387 pct, budget, total_vram, SYSTEM_BUFFER_VRAM_GB) 

388 except Exception: 

389 pass 

390 

391 # ── Helpers ──────────────────────────────────────────────────────── 

392 

393 @staticmethod 

394 def _get_total_ram_gb() -> float: 

395 psutil = _try_import_psutil() 

396 if psutil: 

397 try: 

398 return psutil.virtual_memory().total / (1024**3) 

399 except Exception: 

400 pass 

401 # Windows fallback 

402 if sys.platform == 'win32': 

403 try: 

404 class MEMORYSTATUSEX(ctypes.Structure): 

405 _fields_ = [ 

406 ('dwLength', ctypes.c_ulong), 

407 ('dwMemoryLoad', ctypes.c_ulong), 

408 ('ullTotalPhys', ctypes.c_ulonglong), 

409 ('ullAvailPhys', ctypes.c_ulonglong), 

410 ('ullTotalPageFile', ctypes.c_ulonglong), 

411 ('ullAvailPageFile', ctypes.c_ulonglong), 

412 ('ullTotalVirtual', ctypes.c_ulonglong), 

413 ('ullAvailVirtual', ctypes.c_ulonglong), 

414 ('ullAvailExtendedVirtual', ctypes.c_ulonglong), 

415 ] 

416 ms = MEMORYSTATUSEX() 

417 ms.dwLength = ctypes.sizeof(MEMORYSTATUSEX) 

418 ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(ms)) 

419 return ms.ullTotalPhys / (1024**3) 

420 except Exception: 

421 pass 

422 return 8.0 # Safe default 

423 

424 

425# Module-level enforcer singleton 

426_enforcer: Optional[ResourceEnforcer] = None 

427 

428 

429_enforcer_lock = threading.Lock() 

430 

431 

432def get_enforcer() -> ResourceEnforcer: 

433 """Get or create the singleton ResourceEnforcer.""" 

434 global _enforcer 

435 if _enforcer is None: 

436 with _enforcer_lock: 

437 if _enforcer is None: 

438 _enforcer = ResourceEnforcer() 

439 return _enforcer 

440 

441 

442# ═══════════════════════════════════════════════════════════════════════ 

443# Helpers — platform-agnostic resource detection (psutil optional) 

444# ═══════════════════════════════════════════════════════════════════════ 

445 

446def _try_import_psutil(): 

447 """Return psutil module or None if unavailable.""" 

448 try: 

449 import psutil 

450 return psutil 

451 except ImportError: 

452 return None 

453 

454 

455def _jitter(base_seconds: float, spread: float = 0.5) -> float: 

456 """Return base_seconds +/- spread*base_seconds (uniform random). 

457 

458 Prevents thundering herd when many hive nodes use the same interval. 

459 """ 

460 low = base_seconds * (1.0 - spread) 

461 high = base_seconds * (1.0 + spread) 

462 return random.uniform(low, high) 

463 

464 

465# ═══════════════════════════════════════════════════════════════════════ 

466# ResourceGovernor 

467# ═══════════════════════════════════════════════════════════════════════ 

468 

469class ResourceGovernor: 

470 """Central resource controller for HARTOS. 

471 

472 Monitors CPU, memory, battery, and user activity to transition between 

473 ACTIVE / IDLE / SLEEP modes. Exposes a throttle factor that all HARTOS 

474 subsystems should check before doing heavy work. 

475 

476 When the user is idle, runs a proactive action stream that samples hive 

477 intelligence at randomized intervals. 

478 """ 

479 

480 def __init__(self, idle_threshold_seconds: float = IDLE_THRESHOLD_SECONDS): 

481 # Mode state 

482 self._mode: str = MODE_ACTIVE 

483 self._cpu_limit: float = ACTIVE_CPU_LIMIT 

484 self._gpu_allowed: bool = False 

485 self._last_user_activity: float = time.monotonic() 

486 self._idle_threshold_seconds: float = idle_threshold_seconds 

487 

488 # Threading 

489 self._proactive_thread: Optional[threading.Thread] = None 

490 self._monitor_thread: Optional[threading.Thread] = None 

491 self._running: bool = False 

492 self._lock = threading.Lock() 

493 self._cancel_event = threading.Event() # instant wake/cancel for proactive 

494 

495 # Prime psutil CPU counter (first call always returns 0.0) 

496 try: 

497 import psutil 

498 psutil.cpu_percent(interval=None) 

499 except Exception: 

500 pass 

501 

502 # Stats (exposed for dashboards) 

503 self._stats: dict = { 

504 'mode_changes': 0, 

505 'proactive_actions': 0, 

506 'signals_checked': 0, 

507 'tasks_dispatched': 0, 

508 'models_prefetched': 0, 

509 'benchmarks_run': 0, 

510 'last_mode_change': 0.0, 

511 'uptime_start': 0.0, 

512 } 

513 

514 # ── Lifecycle ───────────────────────────────────────────────── 

515 

516 def start(self, defer_memory_limit: bool = False) -> None: 

517 """Start the governor background monitor and proactive stream. 

518 

519 Args: 

520 defer_memory_limit: If True, apply priority + CPU caps now but 

521 skip the Job Object / cgroup memory limit. The caller is 

522 responsible for calling apply_memory_limit() later (after 

523 the heavy import spike is over). This prevents Windows 

524 from terminating the process during the boot-time memory 

525 peak (autogen + flaml + llmlingua + transformers + 96 

526 expert agents all imported before webview.start). 

527 """ 

528 with self._lock: 

529 if self._running: 

530 return 

531 self._running = True 

532 self._cancel_event.clear() 

533 self._stats['uptime_start'] = time.time() 

534 

535 # Apply hard OS-level resource caps at startup 

536 try: 

537 enforcer = get_enforcer() 

538 if defer_memory_limit: 

539 # Priority + CPU only — memory cap deferred to avoid 

540 # SIGKILL on boot-time spike (see app.py comment block). 

541 enforcer._set_process_priority() 

542 enforcer._enforce_cpu(0.75, max(1, int((os.cpu_count() or 4) * 0.75)), os.cpu_count() or 4) 

543 enforcer._enforced = True # mark so update_caps doesn't re-enforce 

544 logger.info("ResourceEnforcer: priority + CPU applied (memory deferred)") 

545 else: 

546 enforcer.enforce(cpu_fraction=0.75, ram_fraction=0.75, gpu_fraction=0.75) 

547 except Exception as e: 

548 logger.warning("ResourceEnforcer failed at startup: %s", e) 

549 

550 self._monitor_thread = threading.Thread( 

551 target=self._monitor_loop, 

552 name='ResourceGovernor-Monitor', 

553 daemon=True, 

554 ) 

555 self._monitor_thread.start() 

556 

557 self._proactive_thread = threading.Thread( 

558 target=self._proactive_action_stream, 

559 name='ResourceGovernor-Proactive', 

560 daemon=True, 

561 ) 

562 self._proactive_thread.start() 

563 

564 logger.info("ResourceGovernor started (idle threshold=%.0fs)", 

565 self._idle_threshold_seconds) 

566 

567 def apply_memory_limit(self) -> None: 

568 """Apply the deferred RAM limit — call AFTER boot-time spike is over. 

569 

570 Pairs with start(defer_memory_limit=True). Safe to call multiple 

571 times (idempotent via enforcer._enforce_ram's Job Object update). 

572 """ 

573 try: 

574 enforcer = get_enforcer() 

575 total_ram_gb = enforcer._get_total_ram_gb() 

576 usable_ram_gb = max(0.5, total_ram_gb * 0.75 - SYSTEM_BUFFER_RAM_GB) 

577 enforcer._enforce_ram(usable_ram_gb) 

578 logger.info("ResourceEnforcer: memory cap now active (%.1fGB)", usable_ram_gb) 

579 except Exception as e: 

580 logger.warning("ResourceEnforcer: deferred memory cap failed: %s", e) 

581 

582 def stop(self) -> None: 

583 """Stop the governor, release all throttles.""" 

584 with self._lock: 

585 if not self._running: 

586 return 

587 self._running = False 

588 

589 self._cancel_event.set() 

590 

591 # Wait for threads to exit (bounded timeout) 

592 for t in (self._monitor_thread, self._proactive_thread): 

593 if t is not None and t.is_alive(): 

594 t.join(timeout=3.0) 

595 

596 self._monitor_thread = None 

597 self._proactive_thread = None 

598 logger.info("ResourceGovernor stopped") 

599 

600 # ── Public API ──────────────────────────────────────────────── 

601 

602 def get_mode(self) -> str: 

603 """Current mode: 'active', 'idle', or 'sleep'.""" 

604 return self._mode 

605 

606 def get_throttle(self) -> float: 

607 """Current throttle factor 0.0 (full stop) to 1.0 (unlimited). 

608 

609 Other HARTOS subsystems should multiply their resource usage by 

610 this value before proceeding with heavy work. 

611 """ 

612 return self._calculate_throttle() 

613 

614 def should_allow(self, resource: str) -> bool: 

615 """Quick check: should this resource usage be allowed right now? 

616 

617 Args: 

618 resource: One of 'cpu_heavy', 'gpu', 'network_heavy', 'disk_heavy' 

619 

620 Returns: 

621 True if the governor permits the resource usage. 

622 """ 

623 mode = self._mode 

624 if mode == MODE_SLEEP: 

625 return False 

626 

627 if mode == MODE_ACTIVE: 

628 # Active mode: only permit lightweight work 

629 if resource in ('gpu', 'cpu_heavy', 'disk_heavy'): 

630 return False 

631 if resource == 'network_heavy': 

632 return False 

633 return True 

634 

635 # IDLE mode: allow most things 

636 if resource == 'gpu': 

637 return self._gpu_allowed 

638 return True 

639 

640 def report_user_activity(self) -> None: 

641 """Signal that the user is active. Immediately switches to ACTIVE mode. 

642 

643 Called by UI/input handlers, Flask endpoints, or any user-facing code. 

644 """ 

645 self._last_user_activity = time.monotonic() 

646 if self._mode != MODE_ACTIVE: 

647 self._transition_to(MODE_ACTIVE) 

648 

649 def get_stats(self) -> dict: 

650 """Return a copy of governor statistics for dashboards.""" 

651 with self._lock: 

652 stats = dict(self._stats) 

653 stats['mode'] = self._mode 

654 stats['throttle'] = self._calculate_throttle() 

655 stats['cpu_limit'] = self._cpu_limit 

656 stats['gpu_allowed'] = self._gpu_allowed 

657 return stats 

658 

659 # ── Mode Transitions ────────────────────────────────────────── 

660 

661 def _transition_to(self, new_mode: str) -> None: 

662 """Switch modes, update limits, emit event.""" 

663 old_mode = self._mode 

664 

665 if new_mode == old_mode: 

666 return 

667 

668 self._mode = new_mode 

669 

670 if new_mode == MODE_ACTIVE: 

671 self._cpu_limit = ACTIVE_CPU_LIMIT 

672 self._gpu_allowed = False 

673 # Wake the cancel event so proactive stream backs off instantly 

674 self._cancel_event.set() 

675 elif new_mode == MODE_IDLE: 

676 self._cpu_limit = IDLE_CPU_LIMIT 

677 self._gpu_allowed = True 

678 # Clear cancel so proactive stream can run 

679 self._cancel_event.clear() 

680 elif new_mode == MODE_SLEEP: 

681 self._cpu_limit = SLEEP_CPU_LIMIT 

682 self._gpu_allowed = False 

683 self._cancel_event.set() 

684 

685 with self._lock: 

686 self._stats['mode_changes'] += 1 

687 self._stats['last_mode_change'] = time.time() 

688 

689 logger.info("ResourceGovernor: %s -> %s (cpu_limit=%.2f, gpu=%s)", 

690 old_mode, new_mode, self._cpu_limit, self._gpu_allowed) 

691 

692 # Update hard OS-level caps to match new mode 

693 try: 

694 get_enforcer().update_caps(new_mode) 

695 except Exception: 

696 pass 

697 

698 # Emit EventBus event (best-effort, lazy import) 

699 try: 

700 from core.platform.events import emit_event 

701 emit_event('resource.mode_changed', { 

702 'old_mode': old_mode, 

703 'new_mode': new_mode, 

704 'cpu_limit': self._cpu_limit, 

705 'gpu_allowed': self._gpu_allowed, 

706 'timestamp': time.time(), 

707 }) 

708 except Exception: 

709 pass 

710 

711 # ── Monitor Loop ────────────────────────────────────────────── 

712 

713 def _monitor_loop(self) -> None: 

714 """Background thread: monitor system state every 5 seconds. 

715 

716 Three cheap checks per tick: CPU, user idle, battery. 

717 Transitions modes based on combined signals. 

718 """ 

719 while self._running: 

720 # Heartbeat to watchdog — prevents false-positive FROZEN alerts 

721 try: 

722 from security.node_watchdog import get_watchdog 

723 _wd = get_watchdog() 

724 if _wd: 

725 _wd.heartbeat('resource_governor_monitor') 

726 except Exception: 

727 pass 

728 try: 

729 cpu = self._get_cpu_usage() 

730 mem = self._get_memory_pressure() 

731 user_idle = self._detect_user_idle() 

732 battery_level, on_battery = self._get_battery_status() 

733 

734 # Decision tree 

735 if on_battery and battery_level < BATTERY_SLEEP_THRESHOLD: 

736 # Critical battery: force sleep 

737 self._transition_to(MODE_SLEEP) 

738 elif not user_idle: 

739 # User is active 

740 self._transition_to(MODE_ACTIVE) 

741 elif cpu > 0.85 or mem > 0.90: 

742 # System is heavily loaded even though user is idle 

743 # (e.g., background renders, compiles) — stay conservative 

744 self._transition_to(MODE_ACTIVE) 

745 elif on_battery and battery_level < BATTERY_THROTTLE_THRESHOLD: 

746 # Low battery: allow idle work but keep it light 

747 self._transition_to(MODE_IDLE) 

748 self._cpu_limit = IDLE_CPU_LIMIT * 0.5 # half the idle budget 

749 else: 

750 # User idle, system not overloaded, power is fine 

751 self._transition_to(MODE_IDLE) 

752 

753 # Update GPU allowance based on VRAM availability 

754 if self._mode == MODE_IDLE: 

755 self._gpu_allowed = self._check_gpu_available() 

756 

757 except Exception as e: 

758 logger.debug("ResourceGovernor monitor error: %s", e) 

759 

760 # Sleep for the monitor interval, but wake early if stopping 

761 self._cancel_event.wait(timeout=_MONITOR_INTERVAL_SECONDS) 

762 if not self._running: 

763 break 

764 # If cancel_event was set by mode transition, clear it for proactive 

765 # (only if we're not in a mode that should keep it set) 

766 if self._mode == MODE_IDLE: 

767 self._cancel_event.clear() 

768 

769 # ── Platform-Specific Detection ─────────────────────────────── 

770 

771 def _detect_user_idle(self) -> bool: 

772 """Detect whether the user is idle (no input for threshold seconds). 

773 

774 Platform-specific: 

775 Windows: GetLastInputInfo via ctypes 

776 Linux: /proc/interrupts delta or fallback 

777 macOS: ioreg idle time 

778 Fallback: report_user_activity() timestamp 

779 """ 

780 idle_ms = self._get_os_idle_ms() 

781 if idle_ms is not None: 

782 return idle_ms >= (self._idle_threshold_seconds * 1000) 

783 

784 # Fallback: use last reported user activity 

785 elapsed = time.monotonic() - self._last_user_activity 

786 return elapsed >= self._idle_threshold_seconds 

787 

788 def _get_os_idle_ms(self) -> Optional[float]: 

789 """Get OS-level user idle time in milliseconds, or None if unavailable.""" 

790 if sys.platform == 'win32': 

791 return self._get_idle_ms_windows() 

792 elif sys.platform == 'linux': 

793 return self._get_idle_ms_linux() 

794 elif sys.platform == 'darwin': 

795 return self._get_idle_ms_macos() 

796 return None 

797 

798 def _get_idle_ms_windows(self) -> Optional[float]: 

799 """Windows: GetLastInputInfo returns tick count of last input event.""" 

800 try: 

801 # LASTINPUTINFO struct: cbSize (UINT), dwTime (DWORD) 

802 class LASTINPUTINFO(ctypes.Structure): 

803 _fields_ = [ 

804 ('cbSize', ctypes.c_uint), 

805 ('dwTime', ctypes.c_uint), 

806 ] 

807 

808 lii = LASTINPUTINFO() 

809 lii.cbSize = ctypes.sizeof(LASTINPUTINFO) 

810 if ctypes.windll.user32.GetLastInputInfo(ctypes.byref(lii)): 

811 current_tick = ctypes.windll.kernel32.GetTickCount() 

812 idle_ms = current_tick - lii.dwTime 

813 # Handle tick count wraparound (every ~49.7 days) 

814 if idle_ms < 0: 

815 idle_ms += 0xFFFFFFFF + 1 

816 return float(idle_ms) 

817 except Exception: 

818 pass 

819 return None 

820 

821 def _get_idle_ms_linux(self) -> Optional[float]: 

822 """Linux: try xprintidle, then /proc/interrupts delta estimation.""" 

823 # Try xprintidle first (X11 desktops) 

824 try: 

825 import subprocess 

826 result = subprocess.run( 

827 ['xprintidle'], capture_output=True, text=True, timeout=2, 

828 ) 

829 if result.returncode == 0: 

830 return float(result.stdout.strip()) 

831 except Exception: 

832 pass 

833 return None 

834 

835 def _get_idle_ms_macos(self) -> Optional[float]: 

836 """macOS: ioreg HIDIdleTime (nanoseconds -> milliseconds).""" 

837 try: 

838 import subprocess 

839 result = subprocess.run( 

840 ['ioreg', '-c', 'IOHIDSystem'], 

841 capture_output=True, text=True, timeout=2, 

842 ) 

843 if result.returncode == 0: 

844 for line in result.stdout.splitlines(): 

845 if 'HIDIdleTime' in line: 

846 # Line format: "HIDIdleTime" = 1234567890 

847 parts = line.split('=') 

848 if len(parts) >= 2: 

849 ns = int(parts[-1].strip()) 

850 return ns / 1_000_000.0 # ns -> ms 

851 except Exception: 

852 pass 

853 return None 

854 

855 # ── CPU / Memory / Battery / GPU ────────────────────────────── 

856 

857 def _get_optimizer_snapshot(self): 

858 """Read the latest cached SystemSnapshot from ComputeOptimizer. 

859 

860 Returns the snap or None when the optimizer module/singleton 

861 isn't available (e.g. degraded boot, or a deployment that 

862 explicitly disables it). ``include_per_process=False`` is 

863 critical here — Governor only needs cpu/ram aggregates, and 

864 forcing the optimizer's process_iter walk on every Governor 

865 tick under sustained pressure would actually INCREASE GIL 

866 contention (the opposite of why this consolidation exists). 

867 

868 Single source of truth for system pressure data — eliminates 

869 the 2026-05-01 parallel-psutil-poll between Governor and 

870 Optimizer that py-spy caught with BOTH monitors active+gil 

871 simultaneously. 

872 """ 

873 try: 

874 from core.compute_optimizer import get_optimizer 

875 return get_optimizer().get_latest_snapshot( 

876 max_age_s=10.0, include_per_process=False, 

877 ) 

878 except Exception: 

879 return None 

880 

881 def _get_cpu_usage(self) -> float: 

882 """Get current CPU usage as a float 0.0 to 1.0. 

883 

884 Read order: 

885 1. ComputeOptimizer's cached snapshot (canonical psutil reader). 

886 2. Direct psutil fallback when optimizer unavailable. 

887 3. Linux ``os.getloadavg()`` when psutil itself is missing. 

888 4. Constant 0.3 as a last-resort moderate-usage assumption. 

889 

890 The first path is the steady-state hot path post-2026-05-01 

891 unification — Governor used to call psutil.cpu_percent itself 

892 every 5s, duplicating ComputeOptimizer's 15s poll. The cached 

893 snap gives us the same data with one writer. 

894 """ 

895 snap = self._get_optimizer_snapshot() 

896 if snap is not None: 

897 return snap.cpu_percent / 100.0 

898 

899 psutil = _try_import_psutil() 

900 if psutil is not None: 

901 try: 

902 return psutil.cpu_percent(interval=None) / 100.0 

903 except Exception: 

904 pass 

905 

906 # Linux fallback: os.getloadavg() 

907 if hasattr(os, 'getloadavg'): 

908 try: 

909 load_1min = os.getloadavg()[0] 

910 cpu_count = os.cpu_count() or 1 

911 return min(1.0, load_1min / cpu_count) 

912 except Exception: 

913 pass 

914 

915 # Windows fallback without psutil: assume moderate usage 

916 return 0.3 

917 

918 def _get_memory_pressure(self) -> float: 

919 """Get memory pressure as a float 0.0 to 1.0. 

920 

921 Read order (mirrors ``_get_cpu_usage`` for symmetry): 

922 1. ComputeOptimizer's cached snapshot (canonical psutil reader). 

923 2. Direct psutil fallback when optimizer unavailable. 

924 3. Linux ``/proc/meminfo`` when psutil itself is missing. 

925 4. Constant 0.4 as a last-resort moderate-usage assumption. 

926 """ 

927 snap = self._get_optimizer_snapshot() 

928 if snap is not None: 

929 return snap.ram_percent / 100.0 

930 

931 psutil = _try_import_psutil() 

932 if psutil is not None: 

933 try: 

934 return psutil.virtual_memory().percent / 100.0 

935 except Exception: 

936 pass 

937 

938 # Linux fallback: /proc/meminfo 

939 if sys.platform == 'linux': 

940 try: 

941 with open('/proc/meminfo', 'r') as f: 

942 mem_info = {} 

943 for line in f: 

944 parts = line.split() 

945 if len(parts) >= 2: 

946 key = parts[0].rstrip(':') 

947 mem_info[key] = int(parts[1]) 

948 total = mem_info.get('MemTotal', 1) 

949 available = mem_info.get('MemAvailable', 

950 mem_info.get('MemFree', total)) 

951 return 1.0 - (available / total) 

952 except Exception: 

953 pass 

954 

955 # Fallback: assume moderate 

956 return 0.4 

957 

958 def _get_battery_status(self) -> tuple: 

959 """Return (battery_level: float 0-1, on_battery: bool). 

960 

961 Returns (1.0, False) if no battery detected (desktop). 

962 """ 

963 psutil = _try_import_psutil() 

964 if psutil is not None: 

965 try: 

966 battery = psutil.sensors_battery() 

967 if battery is not None: 

968 return (battery.percent / 100.0, not battery.power_plugged) 

969 except Exception: 

970 pass 

971 

972 # Windows fallback: GetSystemPowerStatus via ctypes 

973 if sys.platform == 'win32': 

974 try: 

975 class SYSTEM_POWER_STATUS(ctypes.Structure): 

976 _fields_ = [ 

977 ('ACLineStatus', ctypes.c_byte), 

978 ('BatteryFlag', ctypes.c_byte), 

979 ('BatteryLifePercent', ctypes.c_byte), 

980 ('SystemStatusFlag', ctypes.c_byte), 

981 ('BatteryLifeTime', ctypes.c_ulong), 

982 ('BatteryFullLifeTime', ctypes.c_ulong), 

983 ] 

984 

985 status = SYSTEM_POWER_STATUS() 

986 if ctypes.windll.kernel32.GetSystemPowerStatus( 

987 ctypes.byref(status)): 

988 pct = status.BatteryLifePercent 

989 if pct == 255: 

990 # Unknown / no battery 

991 return (1.0, False) 

992 on_battery = (status.ACLineStatus == 0) 

993 return (pct / 100.0, on_battery) 

994 except Exception: 

995 pass 

996 

997 # Linux fallback: /sys/class/power_supply 

998 if sys.platform == 'linux': 

999 try: 

1000 base = '/sys/class/power_supply' 

1001 for entry in os.listdir(base): 

1002 supply_path = os.path.join(base, entry) 

1003 type_path = os.path.join(supply_path, 'type') 

1004 if os.path.exists(type_path): 

1005 with open(type_path) as f: 

1006 if f.read().strip() != 'Battery': 

1007 continue 

1008 cap_path = os.path.join(supply_path, 'capacity') 

1009 status_path = os.path.join(supply_path, 'status') 

1010 if os.path.exists(cap_path): 

1011 with open(cap_path) as f: 

1012 pct = int(f.read().strip()) 

1013 on_battery = True 

1014 if os.path.exists(status_path): 

1015 with open(status_path) as f: 

1016 on_battery = f.read().strip() == 'Discharging' 

1017 return (pct / 100.0, on_battery) 

1018 except Exception: 

1019 pass 

1020 

1021 # No battery detected (desktop) 

1022 return (1.0, False) 

1023 

1024 def _check_gpu_available(self) -> bool: 

1025 """Check if GPU has usable free VRAM for hive work.""" 

1026 try: 

1027 from integrations.service_tools.vram_manager import vram_manager 

1028 info = vram_manager.detect_gpu() 

1029 if not info.get('cuda_available'): 

1030 return False 

1031 free_gb = info.get('free_gb', 0.0) 

1032 # Need at least 1 GB free to be useful for hive tasks 

1033 return free_gb >= 1.0 

1034 except Exception: 

1035 return False 

1036 

1037 # ── Throttle Calculation ────────────────────────────────────── 

1038 

1039 def _calculate_throttle(self) -> float: 

1040 """Combine all signals into a single throttle factor 0.0 - 1.0. 

1041 

1042 ACTIVE mode: 0.05 — bare minimum for event processing 

1043 IDLE + low CPU: 1.0 — full speed 

1044 IDLE + moderate CPU: 0.5 

1045 SLEEP: 0.0 — suspend everything 

1046 Battery < 20%: force 0.0 

1047 """ 

1048 mode = self._mode 

1049 

1050 if mode == MODE_SLEEP: 

1051 return 0.0 

1052 

1053 if mode == MODE_ACTIVE: 

1054 return ACTIVE_CPU_LIMIT # 0.05 

1055 

1056 # IDLE mode — scale based on current resource usage 

1057 cpu = self._get_cpu_usage() 

1058 mem = self._get_memory_pressure() 

1059 

1060 throttle = 1.0 

1061 

1062 if cpu > 0.80: 

1063 throttle *= 0.2 

1064 elif cpu > 0.60: 

1065 throttle *= 0.5 

1066 elif cpu > 0.40: 

1067 throttle *= 0.8 

1068 

1069 if mem > 0.90: 

1070 throttle *= 0.3 

1071 elif mem > 0.80: 

1072 throttle *= 0.6 

1073 

1074 # Battery factor 

1075 battery_level, on_battery = self._get_battery_status() 

1076 if on_battery: 

1077 if battery_level < BATTERY_SLEEP_THRESHOLD: 

1078 return 0.0 

1079 elif battery_level < BATTERY_THROTTLE_THRESHOLD: 

1080 throttle *= 0.5 

1081 

1082 return max(0.0, min(1.0, throttle)) 

1083 

1084 # ── Proactive Action Stream ─────────────────────────────────── 

1085 

1086 def _proactive_action_stream(self) -> None: 

1087 """Run during IDLE mode. Randomized sampling of hive intelligence. 

1088 

1089 Timers: 

1090 - 30-120s: check hive signal feed for actionable items 

1091 - 5-15min: check if pending hive tasks match this node 

1092 - 30-60min: pre-download trending models if VRAM available 

1093 - ~1h: run quick benchmark on active model 

1094 

1095 All timers use random jitter to prevent thundering herd. 

1096 Immediately stops when mode switches to ACTIVE (via _cancel_event). 

1097 """ 

1098 # Initialize next-action timestamps with jitter from now 

1099 now = time.monotonic() 

1100 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL) 

1101 next_task_check = now + _jitter(TASK_CHECK_INTERVAL) 

1102 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL) 

1103 next_benchmark = now + _jitter(BENCHMARK_INTERVAL) 

1104 

1105 while self._running: 

1106 # Heartbeat to watchdog each loop iteration 

1107 try: 

1108 from security.node_watchdog import get_watchdog 

1109 _wd = get_watchdog() 

1110 if _wd: 

1111 _wd.heartbeat('resource_governor_proactive') 

1112 except Exception: 

1113 pass 

1114 # Sleep in short increments, checking cancel event 

1115 # Wait returns True if the event is set (cancel requested) 

1116 cancelled = self._cancel_event.wait(timeout=5.0) 

1117 

1118 if not self._running: 

1119 break 

1120 

1121 # Only do proactive work in IDLE mode 

1122 if self._mode != MODE_IDLE: 

1123 # Reset timers when re-entering idle (with fresh jitter) 

1124 now = time.monotonic() 

1125 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL) 

1126 next_task_check = now + _jitter(TASK_CHECK_INTERVAL) 

1127 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL) 

1128 next_benchmark = now + _jitter(BENCHMARK_INTERVAL) 

1129 continue 

1130 

1131 now = time.monotonic() 

1132 

1133 # Check hive signal feed 

1134 if now >= next_signal_check: 

1135 self._proactive_check_signals() 

1136 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL) 

1137 

1138 # Check pending hive tasks 

1139 if now >= next_task_check: 

1140 self._proactive_check_tasks() 

1141 next_task_check = now + _jitter(TASK_CHECK_INTERVAL) 

1142 

1143 # Pre-download trending models 

1144 if now >= next_model_prefetch: 

1145 self._proactive_prefetch_models() 

1146 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL) 

1147 

1148 # Run benchmark 

1149 if now >= next_benchmark: 

1150 self._proactive_run_benchmark() 

1151 next_benchmark = now + _jitter(BENCHMARK_INTERVAL) 

1152 

1153 def _proactive_check_signals(self) -> None: 

1154 """Check hive signals AND run agentic service discovery.""" 

1155 if self._mode != MODE_IDLE: 

1156 return 

1157 

1158 # Hive signal feed 

1159 try: 

1160 from integrations.channels.hive_signal_bridge import get_signal_bridge 

1161 bridge = get_signal_bridge() 

1162 signals = bridge.get_signal_feed(limit=10) 

1163 with self._lock: 

1164 self._stats['signals_checked'] += 1 

1165 self._stats['proactive_actions'] += 1 

1166 if signals: 

1167 logger.debug("ResourceGovernor: checked %d hive signals", 

1168 len(signals)) 

1169 self._emit_proactive_event('signal_check', { 

1170 'signal_count': len(signals), 

1171 }) 

1172 except Exception as e: 

1173 logger.debug("ResourceGovernor: signal check failed: %s", e) 

1174 

1175 # Agentic service discovery — autonomously find new providers/models 

1176 try: 

1177 from integrations.providers.discovery_agent import get_discovery_agent 

1178 discoveries = get_discovery_agent().run_discovery_cycle() 

1179 if discoveries: 

1180 with self._lock: 

1181 self._stats['proactive_actions'] += 1 

1182 self._emit_proactive_event('service_discovery', { 

1183 'discoveries': len(discoveries), 

1184 }) 

1185 except Exception as e: 

1186 logger.debug("ResourceGovernor: discovery agent failed: %s", e) 

1187 

1188 def _proactive_check_tasks(self) -> None: 

1189 """Check if any pending hive tasks match this node's capabilities.""" 

1190 if self._mode != MODE_IDLE: 

1191 return 

1192 try: 

1193 from integrations.coding_agent.hive_task_protocol import ( 

1194 get_dispatcher, 

1195 ) 

1196 dispatcher = get_dispatcher() 

1197 dispatched = dispatcher.dispatch_pending() 

1198 with self._lock: 

1199 self._stats['tasks_dispatched'] += dispatched 

1200 self._stats['proactive_actions'] += 1 

1201 if dispatched: 

1202 logger.info("ResourceGovernor: dispatched %d hive tasks " 

1203 "during idle", dispatched) 

1204 self._emit_proactive_event('task_dispatch', { 

1205 'dispatched': dispatched, 

1206 }) 

1207 except Exception as e: 

1208 logger.debug("ResourceGovernor: task check failed: %s", e) 

1209 

1210 def _proactive_prefetch_models(self) -> None: 

1211 """Pre-download trending models if GPU VRAM is available.""" 

1212 if self._mode != MODE_IDLE or not self._gpu_allowed: 

1213 return 

1214 try: 

1215 # Check with model lifecycle for prefetch suggestions 

1216 from integrations.service_tools.model_lifecycle import ( 

1217 get_model_lifecycle_manager, 

1218 ) 

1219 mgr = get_model_lifecycle_manager() 

1220 pressure = mgr.get_system_pressure() 

1221 if pressure.get('throttle_factor', 0) < 0.3: 

1222 # System too busy for prefetch 

1223 return 

1224 with self._lock: 

1225 self._stats['models_prefetched'] += 1 

1226 self._stats['proactive_actions'] += 1 

1227 logger.debug("ResourceGovernor: model prefetch check complete") 

1228 self._emit_proactive_event('model_prefetch', { 

1229 'throttle_factor': pressure.get('throttle_factor', 0), 

1230 }) 

1231 except Exception as e: 

1232 logger.debug("ResourceGovernor: model prefetch failed: %s", e) 

1233 

1234 def _proactive_run_benchmark(self) -> None: 

1235 """Run efficiency benchmarks on cloud providers during idle time. 

1236 

1237 Uses the EfficiencyMatrix to benchmark all configured API providers, 

1238 building a continuous picture of speed/quality/cost for optimal routing. 

1239 Also checks local model performance via the model_lifecycle manager. 

1240 """ 

1241 if self._mode != MODE_IDLE: 

1242 return 

1243 

1244 # Check system pressure — don't benchmark if system is busy 

1245 try: 

1246 from integrations.service_tools.model_lifecycle import ( 

1247 get_model_lifecycle_manager, 

1248 ) 

1249 mgr = get_model_lifecycle_manager() 

1250 pressure = mgr.get_system_pressure() 

1251 if pressure.get('throttle_factor', 0) < 0.5: 

1252 return 

1253 except Exception: 

1254 pass 

1255 

1256 # Run provider efficiency benchmarks 

1257 try: 

1258 from integrations.providers.efficiency_matrix import get_matrix 

1259 matrix = get_matrix() 

1260 matrix.run_benchmark(model_type='llm') 

1261 with self._lock: 

1262 self._stats['benchmarks_run'] += 1 

1263 self._stats['proactive_actions'] += 1 

1264 logger.info("ResourceGovernor: provider benchmarks complete") 

1265 self._emit_proactive_event('benchmark', { 

1266 'summary': matrix.get_matrix_summary(), 

1267 }) 

1268 except Exception as e: 

1269 logger.debug("ResourceGovernor: benchmark failed: %s", e) 

1270 

1271 def _emit_proactive_event(self, action: str, data: dict) -> None: 

1272 """Emit a 'resource.proactive_action' event (best-effort).""" 

1273 try: 

1274 from core.platform.events import emit_event 

1275 emit_event('resource.proactive_action', { 

1276 'action': action, 

1277 'mode': self._mode, 

1278 'timestamp': time.time(), 

1279 **data, 

1280 }) 

1281 except Exception: 

1282 pass 

1283 

1284 

1285# ═══════════════════════════════════════════════════════════════════════ 

1286# Singleton 

1287# ═══════════════════════════════════════════════════════════════════════ 

1288 

1289_governor: Optional[ResourceGovernor] = None 

1290_governor_lock = threading.Lock() 

1291 

1292 

1293def get_governor() -> ResourceGovernor: 

1294 """Get or create the singleton ResourceGovernor.""" 

1295 global _governor 

1296 if _governor is None: 

1297 with _governor_lock: 

1298 if _governor is None: 

1299 _governor = ResourceGovernor() 

1300 return _governor 

1301 

1302 

1303def should_proceed(resource: str = 'cpu_heavy') -> bool: 

1304 """Module-level convenience: should HARTOS proceed with this resource usage? 

1305 

1306 Returns True if the governor allows the requested resource, or if the 

1307 governor has not been started (no throttling by default). 

1308 

1309 Args: 

1310 resource: One of 'cpu_heavy', 'gpu', 'network_heavy', 'disk_heavy' 

1311 

1312 Usage: 

1313 from core.resource_governor import should_proceed 

1314 

1315 if should_proceed('gpu'): 

1316 run_inference() 

1317 """ 

1318 gov = _governor 

1319 if gov is None or not gov._running: 

1320 return True 

1321 return gov.should_allow(resource)