Coverage for core / resource_governor.py: 57.4%
610 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2Resource Governor — HARTOS never slows down the host OS.
4Three modes:
5 ACTIVE: User is busy -> HARTOS uses <5% CPU, no GPU, minimal RAM
6 IDLE: User is idle -> HARTOS uses up to 50% CPU, available GPU, moderate RAM
7 SLEEP: System on battery/low resources -> HARTOS suspends all non-essential work
9Enforcement layer (ResourceEnforcer):
10 Hard OS-level caps so Nunba CANNOT exceed its budget, regardless of code bugs:
11 - Windows: Job Object with CPU rate limit + memory limit on the process tree
12 - Linux: cgroup v2 cpu.max + memory.max, fallback to RLIMIT_AS
13 - macOS: RLIMIT_AS (soft cap) + process priority
14 - GPU: CUDA_MPS_ACTIVE_THREAD_PERCENTAGE (NVIDIA MPS) or VRAM budget via lifecycle
15 - Process priority: BELOW_NORMAL on Windows, nice +10 on POSIX
17 System buffer: always reserves 25% CPU, 2 GB RAM, 1 GB VRAM for the rest of the OS.
18 Caps tighten/relax on mode transitions (ACTIVE → tight, IDLE → relaxed, SLEEP → minimal).
20Proactive Action Stream:
21 When IDLE, periodically samples hive intelligence:
22 - Check if any hive tasks match this node's capabilities
23 - Pre-download popular models that users nearby are requesting
24 - Run background benchmarks to optimize inference settings
25 - Explore community signal feed for actionable insights
27All sampling is randomized (jitter) to avoid thundering herd across hive nodes.
29Integration:
30 - Uses VRAMManager for GPU detection (vram_manager.detect_gpu())
31 - Reads model_lifecycle get_system_pressure() for existing throttle awareness
32 - Emits EventBus events: 'resource.mode_changed', 'resource.proactive_action'
33 - AgentDaemon and HiveTaskDispatcher check get_throttle() / should_proceed()
34 - Lazy imports for all hive modules (zero startup cost)
36Singleton: get_governor() returns the module-level instance.
37Convenience: should_proceed(resource) for quick checks from any module.
38"""
40import ctypes
41import logging
42import os
43import random
44import struct
45import sys
46import threading
47import time
48from typing import Optional
50logger = logging.getLogger(__name__)
52# ═══════════════════════════════════════════════════════════════════════
53# Constants
54# ═══════════════════════════════════════════════════════════════════════
56# Mode thresholds
57IDLE_THRESHOLD_SECONDS = 120 # 2 min of no input -> idle
58ACTIVE_CPU_LIMIT = 0.05 # 5% CPU max when user is active
59IDLE_CPU_LIMIT = 0.50 # 50% CPU max when idle
60SLEEP_CPU_LIMIT = 0.0 # Nothing when sleeping
62# ── System buffer — always reserved for the rest of the OS ──
63SYSTEM_BUFFER_CPU_FRACTION = 0.25 # Reserve 25% CPU cores for OS
64SYSTEM_BUFFER_RAM_GB = 2.0 # Reserve 2 GB RAM for OS
65SYSTEM_BUFFER_VRAM_GB = 1.0 # Reserve 1 GB VRAM for other apps
67# Proactive action intervals (seconds) — randomized +/-50%
68SIGNAL_CHECK_INTERVAL = 60 # Check hive signals
69TASK_CHECK_INTERVAL = 600 # Check pending tasks
70MODEL_PREFETCH_INTERVAL = 1800 # Pre-download models
71BENCHMARK_INTERVAL = 3600 # Run benchmarks
73# Battery thresholds
74BATTERY_SLEEP_THRESHOLD = 0.20 # Force sleep below 20%
75BATTERY_THROTTLE_THRESHOLD = 0.40 # Reduce activity below 40%
77# Monitor loop interval
78_MONITOR_INTERVAL_SECONDS = 5
80# Mode constants
81MODE_ACTIVE = 'active'
82MODE_IDLE = 'idle'
83MODE_SLEEP = 'sleep'
86# ═══════════════════════════════════════════════════════════════════════
87# ResourceEnforcer — hard OS-level process caps
88# ═══════════════════════════════════════════════════════════════════════
90class ResourceEnforcer:
91 """Enforce hard resource caps at the OS level.
93 The governor ADVISES. The enforcer CONSTRAINS.
94 Even buggy code cannot exceed the caps set here.
96 Platform-specific mechanisms:
97 Windows: Job Object (CPU rate limit + memory limit on process tree)
98 Linux: cgroup v2 (cpu.max + memory.max), fallback RLIMIT_AS
99 macOS: RLIMIT_AS (soft cap) + process priority
101 Process priority: BELOW_NORMAL on Windows, nice +10 on POSIX.
102 This alone prevents Nunba from competing with foreground apps.
103 """
105 def __init__(self):
106 self._enforced = False
107 self._job_handle = None # Windows Job Object handle
108 self._cgroup_path = None # Linux cgroup path
109 self._original_nice = None
111 def enforce(self, cpu_fraction: float = 0.75, ram_fraction: float = 0.75,
112 gpu_fraction: float = 0.75):
113 """Apply hard caps. Call once at startup.
115 Args:
116 cpu_fraction: Max fraction of total CPU Nunba can use (0.75 = 75%)
117 ram_fraction: Max fraction of total RAM Nunba can use
118 gpu_fraction: Max fraction of GPU Nunba can use (advisory for CUDA)
119 """
120 if self._enforced:
121 return
123 # Always subtract system buffer from the requested fraction
124 total_ram_gb = self._get_total_ram_gb()
125 usable_ram_gb = max(0.5, total_ram_gb * ram_fraction - SYSTEM_BUFFER_RAM_GB)
127 total_cores = os.cpu_count() or 4
128 usable_cores = max(1, int(total_cores * (cpu_fraction - SYSTEM_BUFFER_CPU_FRACTION)))
130 logger.info("ResourceEnforcer: total_ram=%.1fGB, cap=%.1fGB, "
131 "total_cores=%d, cap_cores=%d",
132 total_ram_gb, usable_ram_gb, total_cores, usable_cores)
134 self._set_process_priority()
135 self._enforce_cpu(cpu_fraction, usable_cores, total_cores)
136 self._enforce_ram(usable_ram_gb)
137 self._enforce_gpu(gpu_fraction)
138 self._enforced = True
139 logger.info("ResourceEnforcer: hard caps applied")
141 def update_caps(self, mode: str):
142 """Tighten or relax caps based on governor mode.
144 ACTIVE: tight (25% CPU, 50% RAM, no GPU)
145 IDLE: relaxed (75% CPU, 75% RAM, GPU allowed)
146 SLEEP: minimal (5% CPU, 25% RAM, no GPU)
147 """
148 if not self._enforced:
149 return
151 caps = {
152 MODE_ACTIVE: (0.25, 0.50, 0.0),
153 MODE_IDLE: (0.75, 0.75, 0.75),
154 MODE_SLEEP: (0.05, 0.25, 0.0),
155 }
156 cpu_frac, ram_frac, gpu_frac = caps.get(mode, (0.50, 0.50, 0.0))
158 total_ram_gb = self._get_total_ram_gb()
159 usable_ram_gb = max(0.5, total_ram_gb * ram_frac - SYSTEM_BUFFER_RAM_GB)
160 total_cores = os.cpu_count() or 4
162 self._enforce_cpu(cpu_frac, max(1, int(total_cores * cpu_frac)), total_cores)
163 self._enforce_ram(usable_ram_gb)
164 self._enforce_gpu(gpu_frac)
166 # ── Process priority ──────────────────────────────────────────────
168 def _set_process_priority(self):
169 """Set process to below-normal priority so foreground apps always win."""
170 try:
171 if sys.platform == 'win32':
172 # BELOW_NORMAL_PRIORITY_CLASS = 0x4000
173 kernel32 = ctypes.windll.kernel32
174 handle = kernel32.GetCurrentProcess()
175 kernel32.SetPriorityClass(handle, 0x4000)
176 logger.info("ResourceEnforcer: set BELOW_NORMAL priority (Windows)")
177 else:
178 self._original_nice = os.nice(0)
179 os.nice(10) # Lower priority (higher nice = lower priority)
180 logger.info("ResourceEnforcer: set nice +10 (POSIX)")
181 except Exception as e:
182 logger.debug("ResourceEnforcer: priority set failed: %s", e)
184 # ── CPU enforcement ───────────────────────────────────────────────
186 def _enforce_cpu(self, cpu_fraction: float, usable_cores: int,
187 total_cores: int):
188 """Enforce CPU cap via Job Object (Windows) or cgroup (Linux)."""
189 if sys.platform == 'win32':
190 self._enforce_cpu_windows(cpu_fraction)
191 elif sys.platform == 'linux':
192 self._enforce_cpu_linux(cpu_fraction, total_cores)
193 # macOS: no hard CPU cap, rely on nice priority + psutil affinity
195 # CPU affinity: restrict to subset of cores
196 psutil = _try_import_psutil()
197 if psutil is not None:
198 try:
199 p = psutil.Process()
200 available = list(range(total_cores))
201 # Use last N cores (leave first cores for OS/foreground)
202 target = available[-usable_cores:] if usable_cores < total_cores else available
203 p.cpu_affinity(target)
204 logger.info("ResourceEnforcer: CPU affinity set to cores %s", target)
205 except Exception as e:
206 logger.debug("ResourceEnforcer: CPU affinity failed: %s", e)
208 def _enforce_cpu_windows(self, cpu_fraction: float):
209 """Windows: use Job Object CPU rate limit."""
210 try:
211 kernel32 = ctypes.windll.kernel32
213 # CreateJobObjectW
214 job = kernel32.CreateJobObjectW(None, None)
215 if not job:
216 logger.debug("ResourceEnforcer: CreateJobObject failed")
217 return
219 # JOBOBJECT_CPU_RATE_CONTROL_INFORMATION
220 # Enable CPU rate control with hard cap
221 class JOBOBJECT_CPU_RATE_CONTROL_INFORMATION(ctypes.Structure):
222 _fields_ = [
223 ('ControlFlags', ctypes.c_ulong),
224 ('Value', ctypes.c_ulong), # Union, using CpuRate
225 ]
227 # JOB_OBJECT_CPU_RATE_CONTROL_ENABLE = 0x1
228 # JOB_OBJECT_CPU_RATE_CONTROL_HARD_CAP = 0x4
229 rate_info = JOBOBJECT_CPU_RATE_CONTROL_INFORMATION()
230 rate_info.ControlFlags = 0x1 | 0x4 # Enable + Hard cap
231 # CpuRate is in units of 1/100th of a percent (100 = 1%)
232 rate_info.Value = max(100, int(cpu_fraction * 10000))
234 # SetInformationJobObject with JobObjectCpuRateControlInformation (15)
235 kernel32.SetInformationJobObject(
236 job, 15, ctypes.byref(rate_info), ctypes.sizeof(rate_info))
238 # Assign current process to the Job Object
239 kernel32.AssignProcessToJobObject(
240 job, kernel32.GetCurrentProcess())
242 self._job_handle = job
243 logger.info("ResourceEnforcer: Windows Job Object CPU rate = %.0f%%",
244 cpu_fraction * 100)
245 except Exception as e:
246 logger.debug("ResourceEnforcer: Windows Job Object failed: %s", e)
248 def _enforce_cpu_linux(self, cpu_fraction: float, total_cores: int):
249 """Linux: use cgroup v2 cpu.max."""
250 try:
251 # Try cgroup v2
252 cg_path = f'/sys/fs/cgroup/nunba_{os.getpid()}'
253 if os.path.isdir('/sys/fs/cgroup/cgroup.controllers'):
254 os.makedirs(cg_path, exist_ok=True)
255 period = 100000 # 100ms
256 quota = int(period * cpu_fraction * total_cores)
257 with open(os.path.join(cg_path, 'cpu.max'), 'w') as f:
258 f.write(f'{quota} {period}')
259 # Move current process into the cgroup
260 with open(os.path.join(cg_path, 'cgroup.procs'), 'w') as f:
261 f.write(str(os.getpid()))
262 self._cgroup_path = cg_path
263 logger.info("ResourceEnforcer: cgroup v2 cpu.max = %d/%d (%.0f%%)",
264 quota, period, cpu_fraction * 100)
265 except PermissionError:
266 logger.debug("ResourceEnforcer: cgroup v2 requires root, skipping")
267 except Exception as e:
268 logger.debug("ResourceEnforcer: cgroup cpu failed: %s", e)
270 # ── RAM enforcement ───────────────────────────────────────────────
272 def _enforce_ram(self, max_ram_gb: float):
273 """Enforce RAM cap via Job Object (Windows), cgroup (Linux), or RLIMIT."""
274 max_bytes = int(max_ram_gb * 1024 * 1024 * 1024)
276 if sys.platform == 'win32':
277 self._enforce_ram_windows(max_bytes)
278 elif sys.platform == 'linux':
279 self._enforce_ram_linux(max_bytes)
280 else:
281 self._enforce_ram_rlimit(max_bytes)
283 def _enforce_ram_windows(self, max_bytes: int):
284 """Windows: Job Object memory limit."""
285 if not self._job_handle:
286 return
287 try:
288 kernel32 = ctypes.windll.kernel32
290 # JOBOBJECT_EXTENDED_LIMIT_INFORMATION
291 class IO_COUNTERS(ctypes.Structure):
292 _fields_ = [('_' + str(i), ctypes.c_ulonglong) for i in range(6)]
294 class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure):
295 _fields_ = [
296 ('PerProcessUserTimeLimit', ctypes.c_longlong),
297 ('PerJobUserTimeLimit', ctypes.c_longlong),
298 ('LimitFlags', ctypes.c_ulong),
299 ('MinimumWorkingSetSize', ctypes.c_size_t),
300 ('MaximumWorkingSetSize', ctypes.c_size_t),
301 ('ActiveProcessLimit', ctypes.c_ulong),
302 ('Affinity', ctypes.c_size_t),
303 ('PriorityClass', ctypes.c_ulong),
304 ('SchedulingClass', ctypes.c_ulong),
305 ]
307 class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure):
308 _fields_ = [
309 ('BasicLimitInformation', JOBOBJECT_BASIC_LIMIT_INFORMATION),
310 ('IoInfo', IO_COUNTERS),
311 ('ProcessMemoryLimit', ctypes.c_size_t),
312 ('JobMemoryLimit', ctypes.c_size_t),
313 ('PeakProcessMemoryUsed', ctypes.c_size_t),
314 ('PeakJobMemoryUsed', ctypes.c_size_t),
315 ]
317 info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION()
318 # JOB_OBJECT_LIMIT_JOB_MEMORY = 0x200
319 info.BasicLimitInformation.LimitFlags = 0x200
320 info.JobMemoryLimit = max_bytes
322 # SetInformationJobObject with JobObjectExtendedLimitInformation (9)
323 kernel32.SetInformationJobObject(
324 self._job_handle, 9, ctypes.byref(info), ctypes.sizeof(info))
326 logger.info("ResourceEnforcer: Windows Job memory limit = %.1f GB",
327 max_bytes / (1024**3))
328 except Exception as e:
329 logger.debug("ResourceEnforcer: Windows memory limit failed: %s", e)
331 def _enforce_ram_linux(self, max_bytes: int):
332 """Linux: cgroup v2 memory.max."""
333 if self._cgroup_path:
334 try:
335 with open(os.path.join(self._cgroup_path, 'memory.max'), 'w') as f:
336 f.write(str(max_bytes))
337 logger.info("ResourceEnforcer: cgroup memory.max = %.1f GB",
338 max_bytes / (1024**3))
339 return
340 except Exception as e:
341 logger.debug("ResourceEnforcer: cgroup memory failed: %s", e)
343 self._enforce_ram_rlimit(max_bytes)
345 def _enforce_ram_rlimit(self, max_bytes: int):
346 """POSIX: RLIMIT_AS (soft cap — process gets MemoryError on exceed)."""
347 try:
348 import resource
349 soft, hard = resource.getrlimit(resource.RLIMIT_AS)
350 resource.setrlimit(resource.RLIMIT_AS, (max_bytes, hard))
351 logger.info("ResourceEnforcer: RLIMIT_AS = %.1f GB",
352 max_bytes / (1024**3))
353 except Exception as e:
354 logger.debug("ResourceEnforcer: RLIMIT_AS failed: %s", e)
356 # ── GPU enforcement ───────────────────────────────────────────────
358 def _enforce_gpu(self, gpu_fraction: float):
359 """Enforce GPU cap via CUDA environment variables.
361 CUDA_MPS_ACTIVE_THREAD_PERCENTAGE limits GPU SM utilization.
362 CUDA_VISIBLE_DEVICES can restrict which GPUs are used.
363 Also reserves SYSTEM_BUFFER_VRAM_GB via VRAMManager budget.
364 """
365 if gpu_fraction <= 0:
366 # No GPU allowed in this mode
367 os.environ['CUDA_VISIBLE_DEVICES'] = ''
368 return
370 pct = max(10, int(gpu_fraction * 100))
371 os.environ['CUDA_MPS_ACTIVE_THREAD_PERCENTAGE'] = str(pct)
373 # Restore GPU visibility if previously hidden
374 if os.environ.get('CUDA_VISIBLE_DEVICES') == '':
375 os.environ.pop('CUDA_VISIBLE_DEVICES', None)
377 # Set VRAM budget via lifecycle (leaves buffer for other apps)
378 try:
379 from integrations.service_tools.vram_manager import vram_manager
380 info = vram_manager.detect_gpu()
381 total_vram = info.get('total_gb', 0)
382 if total_vram > 0:
383 budget = max(0.5, total_vram * gpu_fraction - SYSTEM_BUFFER_VRAM_GB)
384 os.environ['HARTOS_VRAM_BUDGET_GB'] = str(round(budget, 1))
385 logger.info("ResourceEnforcer: GPU %d%% SM, VRAM budget %.1fGB "
386 "(total %.1fGB, buffer %.1fGB)",
387 pct, budget, total_vram, SYSTEM_BUFFER_VRAM_GB)
388 except Exception:
389 pass
391 # ── Helpers ────────────────────────────────────────────────────────
393 @staticmethod
394 def _get_total_ram_gb() -> float:
395 psutil = _try_import_psutil()
396 if psutil:
397 try:
398 return psutil.virtual_memory().total / (1024**3)
399 except Exception:
400 pass
401 # Windows fallback
402 if sys.platform == 'win32':
403 try:
404 class MEMORYSTATUSEX(ctypes.Structure):
405 _fields_ = [
406 ('dwLength', ctypes.c_ulong),
407 ('dwMemoryLoad', ctypes.c_ulong),
408 ('ullTotalPhys', ctypes.c_ulonglong),
409 ('ullAvailPhys', ctypes.c_ulonglong),
410 ('ullTotalPageFile', ctypes.c_ulonglong),
411 ('ullAvailPageFile', ctypes.c_ulonglong),
412 ('ullTotalVirtual', ctypes.c_ulonglong),
413 ('ullAvailVirtual', ctypes.c_ulonglong),
414 ('ullAvailExtendedVirtual', ctypes.c_ulonglong),
415 ]
416 ms = MEMORYSTATUSEX()
417 ms.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
418 ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(ms))
419 return ms.ullTotalPhys / (1024**3)
420 except Exception:
421 pass
422 return 8.0 # Safe default
425# Module-level enforcer singleton
426_enforcer: Optional[ResourceEnforcer] = None
429_enforcer_lock = threading.Lock()
432def get_enforcer() -> ResourceEnforcer:
433 """Get or create the singleton ResourceEnforcer."""
434 global _enforcer
435 if _enforcer is None:
436 with _enforcer_lock:
437 if _enforcer is None:
438 _enforcer = ResourceEnforcer()
439 return _enforcer
442# ═══════════════════════════════════════════════════════════════════════
443# Helpers — platform-agnostic resource detection (psutil optional)
444# ═══════════════════════════════════════════════════════════════════════
446def _try_import_psutil():
447 """Return psutil module or None if unavailable."""
448 try:
449 import psutil
450 return psutil
451 except ImportError:
452 return None
455def _jitter(base_seconds: float, spread: float = 0.5) -> float:
456 """Return base_seconds +/- spread*base_seconds (uniform random).
458 Prevents thundering herd when many hive nodes use the same interval.
459 """
460 low = base_seconds * (1.0 - spread)
461 high = base_seconds * (1.0 + spread)
462 return random.uniform(low, high)
465# ═══════════════════════════════════════════════════════════════════════
466# ResourceGovernor
467# ═══════════════════════════════════════════════════════════════════════
469class ResourceGovernor:
470 """Central resource controller for HARTOS.
472 Monitors CPU, memory, battery, and user activity to transition between
473 ACTIVE / IDLE / SLEEP modes. Exposes a throttle factor that all HARTOS
474 subsystems should check before doing heavy work.
476 When the user is idle, runs a proactive action stream that samples hive
477 intelligence at randomized intervals.
478 """
480 def __init__(self, idle_threshold_seconds: float = IDLE_THRESHOLD_SECONDS):
481 # Mode state
482 self._mode: str = MODE_ACTIVE
483 self._cpu_limit: float = ACTIVE_CPU_LIMIT
484 self._gpu_allowed: bool = False
485 self._last_user_activity: float = time.monotonic()
486 self._idle_threshold_seconds: float = idle_threshold_seconds
488 # Threading
489 self._proactive_thread: Optional[threading.Thread] = None
490 self._monitor_thread: Optional[threading.Thread] = None
491 self._running: bool = False
492 self._lock = threading.Lock()
493 self._cancel_event = threading.Event() # instant wake/cancel for proactive
495 # Prime psutil CPU counter (first call always returns 0.0)
496 try:
497 import psutil
498 psutil.cpu_percent(interval=None)
499 except Exception:
500 pass
502 # Stats (exposed for dashboards)
503 self._stats: dict = {
504 'mode_changes': 0,
505 'proactive_actions': 0,
506 'signals_checked': 0,
507 'tasks_dispatched': 0,
508 'models_prefetched': 0,
509 'benchmarks_run': 0,
510 'last_mode_change': 0.0,
511 'uptime_start': 0.0,
512 }
514 # ── Lifecycle ─────────────────────────────────────────────────
516 def start(self, defer_memory_limit: bool = False) -> None:
517 """Start the governor background monitor and proactive stream.
519 Args:
520 defer_memory_limit: If True, apply priority + CPU caps now but
521 skip the Job Object / cgroup memory limit. The caller is
522 responsible for calling apply_memory_limit() later (after
523 the heavy import spike is over). This prevents Windows
524 from terminating the process during the boot-time memory
525 peak (autogen + flaml + llmlingua + transformers + 96
526 expert agents all imported before webview.start).
527 """
528 with self._lock:
529 if self._running:
530 return
531 self._running = True
532 self._cancel_event.clear()
533 self._stats['uptime_start'] = time.time()
535 # Apply hard OS-level resource caps at startup
536 try:
537 enforcer = get_enforcer()
538 if defer_memory_limit:
539 # Priority + CPU only — memory cap deferred to avoid
540 # SIGKILL on boot-time spike (see app.py comment block).
541 enforcer._set_process_priority()
542 enforcer._enforce_cpu(0.75, max(1, int((os.cpu_count() or 4) * 0.75)), os.cpu_count() or 4)
543 enforcer._enforced = True # mark so update_caps doesn't re-enforce
544 logger.info("ResourceEnforcer: priority + CPU applied (memory deferred)")
545 else:
546 enforcer.enforce(cpu_fraction=0.75, ram_fraction=0.75, gpu_fraction=0.75)
547 except Exception as e:
548 logger.warning("ResourceEnforcer failed at startup: %s", e)
550 self._monitor_thread = threading.Thread(
551 target=self._monitor_loop,
552 name='ResourceGovernor-Monitor',
553 daemon=True,
554 )
555 self._monitor_thread.start()
557 self._proactive_thread = threading.Thread(
558 target=self._proactive_action_stream,
559 name='ResourceGovernor-Proactive',
560 daemon=True,
561 )
562 self._proactive_thread.start()
564 logger.info("ResourceGovernor started (idle threshold=%.0fs)",
565 self._idle_threshold_seconds)
567 def apply_memory_limit(self) -> None:
568 """Apply the deferred RAM limit — call AFTER boot-time spike is over.
570 Pairs with start(defer_memory_limit=True). Safe to call multiple
571 times (idempotent via enforcer._enforce_ram's Job Object update).
572 """
573 try:
574 enforcer = get_enforcer()
575 total_ram_gb = enforcer._get_total_ram_gb()
576 usable_ram_gb = max(0.5, total_ram_gb * 0.75 - SYSTEM_BUFFER_RAM_GB)
577 enforcer._enforce_ram(usable_ram_gb)
578 logger.info("ResourceEnforcer: memory cap now active (%.1fGB)", usable_ram_gb)
579 except Exception as e:
580 logger.warning("ResourceEnforcer: deferred memory cap failed: %s", e)
582 def stop(self) -> None:
583 """Stop the governor, release all throttles."""
584 with self._lock:
585 if not self._running:
586 return
587 self._running = False
589 self._cancel_event.set()
591 # Wait for threads to exit (bounded timeout)
592 for t in (self._monitor_thread, self._proactive_thread):
593 if t is not None and t.is_alive():
594 t.join(timeout=3.0)
596 self._monitor_thread = None
597 self._proactive_thread = None
598 logger.info("ResourceGovernor stopped")
600 # ── Public API ────────────────────────────────────────────────
602 def get_mode(self) -> str:
603 """Current mode: 'active', 'idle', or 'sleep'."""
604 return self._mode
606 def get_throttle(self) -> float:
607 """Current throttle factor 0.0 (full stop) to 1.0 (unlimited).
609 Other HARTOS subsystems should multiply their resource usage by
610 this value before proceeding with heavy work.
611 """
612 return self._calculate_throttle()
614 def should_allow(self, resource: str) -> bool:
615 """Quick check: should this resource usage be allowed right now?
617 Args:
618 resource: One of 'cpu_heavy', 'gpu', 'network_heavy', 'disk_heavy'
620 Returns:
621 True if the governor permits the resource usage.
622 """
623 mode = self._mode
624 if mode == MODE_SLEEP:
625 return False
627 if mode == MODE_ACTIVE:
628 # Active mode: only permit lightweight work
629 if resource in ('gpu', 'cpu_heavy', 'disk_heavy'):
630 return False
631 if resource == 'network_heavy':
632 return False
633 return True
635 # IDLE mode: allow most things
636 if resource == 'gpu':
637 return self._gpu_allowed
638 return True
640 def report_user_activity(self) -> None:
641 """Signal that the user is active. Immediately switches to ACTIVE mode.
643 Called by UI/input handlers, Flask endpoints, or any user-facing code.
644 """
645 self._last_user_activity = time.monotonic()
646 if self._mode != MODE_ACTIVE:
647 self._transition_to(MODE_ACTIVE)
649 def get_stats(self) -> dict:
650 """Return a copy of governor statistics for dashboards."""
651 with self._lock:
652 stats = dict(self._stats)
653 stats['mode'] = self._mode
654 stats['throttle'] = self._calculate_throttle()
655 stats['cpu_limit'] = self._cpu_limit
656 stats['gpu_allowed'] = self._gpu_allowed
657 return stats
659 # ── Mode Transitions ──────────────────────────────────────────
661 def _transition_to(self, new_mode: str) -> None:
662 """Switch modes, update limits, emit event."""
663 old_mode = self._mode
665 if new_mode == old_mode:
666 return
668 self._mode = new_mode
670 if new_mode == MODE_ACTIVE:
671 self._cpu_limit = ACTIVE_CPU_LIMIT
672 self._gpu_allowed = False
673 # Wake the cancel event so proactive stream backs off instantly
674 self._cancel_event.set()
675 elif new_mode == MODE_IDLE:
676 self._cpu_limit = IDLE_CPU_LIMIT
677 self._gpu_allowed = True
678 # Clear cancel so proactive stream can run
679 self._cancel_event.clear()
680 elif new_mode == MODE_SLEEP:
681 self._cpu_limit = SLEEP_CPU_LIMIT
682 self._gpu_allowed = False
683 self._cancel_event.set()
685 with self._lock:
686 self._stats['mode_changes'] += 1
687 self._stats['last_mode_change'] = time.time()
689 logger.info("ResourceGovernor: %s -> %s (cpu_limit=%.2f, gpu=%s)",
690 old_mode, new_mode, self._cpu_limit, self._gpu_allowed)
692 # Update hard OS-level caps to match new mode
693 try:
694 get_enforcer().update_caps(new_mode)
695 except Exception:
696 pass
698 # Emit EventBus event (best-effort, lazy import)
699 try:
700 from core.platform.events import emit_event
701 emit_event('resource.mode_changed', {
702 'old_mode': old_mode,
703 'new_mode': new_mode,
704 'cpu_limit': self._cpu_limit,
705 'gpu_allowed': self._gpu_allowed,
706 'timestamp': time.time(),
707 })
708 except Exception:
709 pass
711 # ── Monitor Loop ──────────────────────────────────────────────
713 def _monitor_loop(self) -> None:
714 """Background thread: monitor system state every 5 seconds.
716 Three cheap checks per tick: CPU, user idle, battery.
717 Transitions modes based on combined signals.
718 """
719 while self._running:
720 # Heartbeat to watchdog — prevents false-positive FROZEN alerts
721 try:
722 from security.node_watchdog import get_watchdog
723 _wd = get_watchdog()
724 if _wd:
725 _wd.heartbeat('resource_governor_monitor')
726 except Exception:
727 pass
728 try:
729 cpu = self._get_cpu_usage()
730 mem = self._get_memory_pressure()
731 user_idle = self._detect_user_idle()
732 battery_level, on_battery = self._get_battery_status()
734 # Decision tree
735 if on_battery and battery_level < BATTERY_SLEEP_THRESHOLD:
736 # Critical battery: force sleep
737 self._transition_to(MODE_SLEEP)
738 elif not user_idle:
739 # User is active
740 self._transition_to(MODE_ACTIVE)
741 elif cpu > 0.85 or mem > 0.90:
742 # System is heavily loaded even though user is idle
743 # (e.g., background renders, compiles) — stay conservative
744 self._transition_to(MODE_ACTIVE)
745 elif on_battery and battery_level < BATTERY_THROTTLE_THRESHOLD:
746 # Low battery: allow idle work but keep it light
747 self._transition_to(MODE_IDLE)
748 self._cpu_limit = IDLE_CPU_LIMIT * 0.5 # half the idle budget
749 else:
750 # User idle, system not overloaded, power is fine
751 self._transition_to(MODE_IDLE)
753 # Update GPU allowance based on VRAM availability
754 if self._mode == MODE_IDLE:
755 self._gpu_allowed = self._check_gpu_available()
757 except Exception as e:
758 logger.debug("ResourceGovernor monitor error: %s", e)
760 # Sleep for the monitor interval, but wake early if stopping
761 self._cancel_event.wait(timeout=_MONITOR_INTERVAL_SECONDS)
762 if not self._running:
763 break
764 # If cancel_event was set by mode transition, clear it for proactive
765 # (only if we're not in a mode that should keep it set)
766 if self._mode == MODE_IDLE:
767 self._cancel_event.clear()
769 # ── Platform-Specific Detection ───────────────────────────────
771 def _detect_user_idle(self) -> bool:
772 """Detect whether the user is idle (no input for threshold seconds).
774 Platform-specific:
775 Windows: GetLastInputInfo via ctypes
776 Linux: /proc/interrupts delta or fallback
777 macOS: ioreg idle time
778 Fallback: report_user_activity() timestamp
779 """
780 idle_ms = self._get_os_idle_ms()
781 if idle_ms is not None:
782 return idle_ms >= (self._idle_threshold_seconds * 1000)
784 # Fallback: use last reported user activity
785 elapsed = time.monotonic() - self._last_user_activity
786 return elapsed >= self._idle_threshold_seconds
788 def _get_os_idle_ms(self) -> Optional[float]:
789 """Get OS-level user idle time in milliseconds, or None if unavailable."""
790 if sys.platform == 'win32':
791 return self._get_idle_ms_windows()
792 elif sys.platform == 'linux':
793 return self._get_idle_ms_linux()
794 elif sys.platform == 'darwin':
795 return self._get_idle_ms_macos()
796 return None
798 def _get_idle_ms_windows(self) -> Optional[float]:
799 """Windows: GetLastInputInfo returns tick count of last input event."""
800 try:
801 # LASTINPUTINFO struct: cbSize (UINT), dwTime (DWORD)
802 class LASTINPUTINFO(ctypes.Structure):
803 _fields_ = [
804 ('cbSize', ctypes.c_uint),
805 ('dwTime', ctypes.c_uint),
806 ]
808 lii = LASTINPUTINFO()
809 lii.cbSize = ctypes.sizeof(LASTINPUTINFO)
810 if ctypes.windll.user32.GetLastInputInfo(ctypes.byref(lii)):
811 current_tick = ctypes.windll.kernel32.GetTickCount()
812 idle_ms = current_tick - lii.dwTime
813 # Handle tick count wraparound (every ~49.7 days)
814 if idle_ms < 0:
815 idle_ms += 0xFFFFFFFF + 1
816 return float(idle_ms)
817 except Exception:
818 pass
819 return None
821 def _get_idle_ms_linux(self) -> Optional[float]:
822 """Linux: try xprintidle, then /proc/interrupts delta estimation."""
823 # Try xprintidle first (X11 desktops)
824 try:
825 import subprocess
826 result = subprocess.run(
827 ['xprintidle'], capture_output=True, text=True, timeout=2,
828 )
829 if result.returncode == 0:
830 return float(result.stdout.strip())
831 except Exception:
832 pass
833 return None
835 def _get_idle_ms_macos(self) -> Optional[float]:
836 """macOS: ioreg HIDIdleTime (nanoseconds -> milliseconds)."""
837 try:
838 import subprocess
839 result = subprocess.run(
840 ['ioreg', '-c', 'IOHIDSystem'],
841 capture_output=True, text=True, timeout=2,
842 )
843 if result.returncode == 0:
844 for line in result.stdout.splitlines():
845 if 'HIDIdleTime' in line:
846 # Line format: "HIDIdleTime" = 1234567890
847 parts = line.split('=')
848 if len(parts) >= 2:
849 ns = int(parts[-1].strip())
850 return ns / 1_000_000.0 # ns -> ms
851 except Exception:
852 pass
853 return None
855 # ── CPU / Memory / Battery / GPU ──────────────────────────────
857 def _get_optimizer_snapshot(self):
858 """Read the latest cached SystemSnapshot from ComputeOptimizer.
860 Returns the snap or None when the optimizer module/singleton
861 isn't available (e.g. degraded boot, or a deployment that
862 explicitly disables it). ``include_per_process=False`` is
863 critical here — Governor only needs cpu/ram aggregates, and
864 forcing the optimizer's process_iter walk on every Governor
865 tick under sustained pressure would actually INCREASE GIL
866 contention (the opposite of why this consolidation exists).
868 Single source of truth for system pressure data — eliminates
869 the 2026-05-01 parallel-psutil-poll between Governor and
870 Optimizer that py-spy caught with BOTH monitors active+gil
871 simultaneously.
872 """
873 try:
874 from core.compute_optimizer import get_optimizer
875 return get_optimizer().get_latest_snapshot(
876 max_age_s=10.0, include_per_process=False,
877 )
878 except Exception:
879 return None
881 def _get_cpu_usage(self) -> float:
882 """Get current CPU usage as a float 0.0 to 1.0.
884 Read order:
885 1. ComputeOptimizer's cached snapshot (canonical psutil reader).
886 2. Direct psutil fallback when optimizer unavailable.
887 3. Linux ``os.getloadavg()`` when psutil itself is missing.
888 4. Constant 0.3 as a last-resort moderate-usage assumption.
890 The first path is the steady-state hot path post-2026-05-01
891 unification — Governor used to call psutil.cpu_percent itself
892 every 5s, duplicating ComputeOptimizer's 15s poll. The cached
893 snap gives us the same data with one writer.
894 """
895 snap = self._get_optimizer_snapshot()
896 if snap is not None:
897 return snap.cpu_percent / 100.0
899 psutil = _try_import_psutil()
900 if psutil is not None:
901 try:
902 return psutil.cpu_percent(interval=None) / 100.0
903 except Exception:
904 pass
906 # Linux fallback: os.getloadavg()
907 if hasattr(os, 'getloadavg'):
908 try:
909 load_1min = os.getloadavg()[0]
910 cpu_count = os.cpu_count() or 1
911 return min(1.0, load_1min / cpu_count)
912 except Exception:
913 pass
915 # Windows fallback without psutil: assume moderate usage
916 return 0.3
918 def _get_memory_pressure(self) -> float:
919 """Get memory pressure as a float 0.0 to 1.0.
921 Read order (mirrors ``_get_cpu_usage`` for symmetry):
922 1. ComputeOptimizer's cached snapshot (canonical psutil reader).
923 2. Direct psutil fallback when optimizer unavailable.
924 3. Linux ``/proc/meminfo`` when psutil itself is missing.
925 4. Constant 0.4 as a last-resort moderate-usage assumption.
926 """
927 snap = self._get_optimizer_snapshot()
928 if snap is not None:
929 return snap.ram_percent / 100.0
931 psutil = _try_import_psutil()
932 if psutil is not None:
933 try:
934 return psutil.virtual_memory().percent / 100.0
935 except Exception:
936 pass
938 # Linux fallback: /proc/meminfo
939 if sys.platform == 'linux':
940 try:
941 with open('/proc/meminfo', 'r') as f:
942 mem_info = {}
943 for line in f:
944 parts = line.split()
945 if len(parts) >= 2:
946 key = parts[0].rstrip(':')
947 mem_info[key] = int(parts[1])
948 total = mem_info.get('MemTotal', 1)
949 available = mem_info.get('MemAvailable',
950 mem_info.get('MemFree', total))
951 return 1.0 - (available / total)
952 except Exception:
953 pass
955 # Fallback: assume moderate
956 return 0.4
958 def _get_battery_status(self) -> tuple:
959 """Return (battery_level: float 0-1, on_battery: bool).
961 Returns (1.0, False) if no battery detected (desktop).
962 """
963 psutil = _try_import_psutil()
964 if psutil is not None:
965 try:
966 battery = psutil.sensors_battery()
967 if battery is not None:
968 return (battery.percent / 100.0, not battery.power_plugged)
969 except Exception:
970 pass
972 # Windows fallback: GetSystemPowerStatus via ctypes
973 if sys.platform == 'win32':
974 try:
975 class SYSTEM_POWER_STATUS(ctypes.Structure):
976 _fields_ = [
977 ('ACLineStatus', ctypes.c_byte),
978 ('BatteryFlag', ctypes.c_byte),
979 ('BatteryLifePercent', ctypes.c_byte),
980 ('SystemStatusFlag', ctypes.c_byte),
981 ('BatteryLifeTime', ctypes.c_ulong),
982 ('BatteryFullLifeTime', ctypes.c_ulong),
983 ]
985 status = SYSTEM_POWER_STATUS()
986 if ctypes.windll.kernel32.GetSystemPowerStatus(
987 ctypes.byref(status)):
988 pct = status.BatteryLifePercent
989 if pct == 255:
990 # Unknown / no battery
991 return (1.0, False)
992 on_battery = (status.ACLineStatus == 0)
993 return (pct / 100.0, on_battery)
994 except Exception:
995 pass
997 # Linux fallback: /sys/class/power_supply
998 if sys.platform == 'linux':
999 try:
1000 base = '/sys/class/power_supply'
1001 for entry in os.listdir(base):
1002 supply_path = os.path.join(base, entry)
1003 type_path = os.path.join(supply_path, 'type')
1004 if os.path.exists(type_path):
1005 with open(type_path) as f:
1006 if f.read().strip() != 'Battery':
1007 continue
1008 cap_path = os.path.join(supply_path, 'capacity')
1009 status_path = os.path.join(supply_path, 'status')
1010 if os.path.exists(cap_path):
1011 with open(cap_path) as f:
1012 pct = int(f.read().strip())
1013 on_battery = True
1014 if os.path.exists(status_path):
1015 with open(status_path) as f:
1016 on_battery = f.read().strip() == 'Discharging'
1017 return (pct / 100.0, on_battery)
1018 except Exception:
1019 pass
1021 # No battery detected (desktop)
1022 return (1.0, False)
1024 def _check_gpu_available(self) -> bool:
1025 """Check if GPU has usable free VRAM for hive work."""
1026 try:
1027 from integrations.service_tools.vram_manager import vram_manager
1028 info = vram_manager.detect_gpu()
1029 if not info.get('cuda_available'):
1030 return False
1031 free_gb = info.get('free_gb', 0.0)
1032 # Need at least 1 GB free to be useful for hive tasks
1033 return free_gb >= 1.0
1034 except Exception:
1035 return False
1037 # ── Throttle Calculation ──────────────────────────────────────
1039 def _calculate_throttle(self) -> float:
1040 """Combine all signals into a single throttle factor 0.0 - 1.0.
1042 ACTIVE mode: 0.05 — bare minimum for event processing
1043 IDLE + low CPU: 1.0 — full speed
1044 IDLE + moderate CPU: 0.5
1045 SLEEP: 0.0 — suspend everything
1046 Battery < 20%: force 0.0
1047 """
1048 mode = self._mode
1050 if mode == MODE_SLEEP:
1051 return 0.0
1053 if mode == MODE_ACTIVE:
1054 return ACTIVE_CPU_LIMIT # 0.05
1056 # IDLE mode — scale based on current resource usage
1057 cpu = self._get_cpu_usage()
1058 mem = self._get_memory_pressure()
1060 throttle = 1.0
1062 if cpu > 0.80:
1063 throttle *= 0.2
1064 elif cpu > 0.60:
1065 throttle *= 0.5
1066 elif cpu > 0.40:
1067 throttle *= 0.8
1069 if mem > 0.90:
1070 throttle *= 0.3
1071 elif mem > 0.80:
1072 throttle *= 0.6
1074 # Battery factor
1075 battery_level, on_battery = self._get_battery_status()
1076 if on_battery:
1077 if battery_level < BATTERY_SLEEP_THRESHOLD:
1078 return 0.0
1079 elif battery_level < BATTERY_THROTTLE_THRESHOLD:
1080 throttle *= 0.5
1082 return max(0.0, min(1.0, throttle))
1084 # ── Proactive Action Stream ───────────────────────────────────
1086 def _proactive_action_stream(self) -> None:
1087 """Run during IDLE mode. Randomized sampling of hive intelligence.
1089 Timers:
1090 - 30-120s: check hive signal feed for actionable items
1091 - 5-15min: check if pending hive tasks match this node
1092 - 30-60min: pre-download trending models if VRAM available
1093 - ~1h: run quick benchmark on active model
1095 All timers use random jitter to prevent thundering herd.
1096 Immediately stops when mode switches to ACTIVE (via _cancel_event).
1097 """
1098 # Initialize next-action timestamps with jitter from now
1099 now = time.monotonic()
1100 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL)
1101 next_task_check = now + _jitter(TASK_CHECK_INTERVAL)
1102 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL)
1103 next_benchmark = now + _jitter(BENCHMARK_INTERVAL)
1105 while self._running:
1106 # Heartbeat to watchdog each loop iteration
1107 try:
1108 from security.node_watchdog import get_watchdog
1109 _wd = get_watchdog()
1110 if _wd:
1111 _wd.heartbeat('resource_governor_proactive')
1112 except Exception:
1113 pass
1114 # Sleep in short increments, checking cancel event
1115 # Wait returns True if the event is set (cancel requested)
1116 cancelled = self._cancel_event.wait(timeout=5.0)
1118 if not self._running:
1119 break
1121 # Only do proactive work in IDLE mode
1122 if self._mode != MODE_IDLE:
1123 # Reset timers when re-entering idle (with fresh jitter)
1124 now = time.monotonic()
1125 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL)
1126 next_task_check = now + _jitter(TASK_CHECK_INTERVAL)
1127 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL)
1128 next_benchmark = now + _jitter(BENCHMARK_INTERVAL)
1129 continue
1131 now = time.monotonic()
1133 # Check hive signal feed
1134 if now >= next_signal_check:
1135 self._proactive_check_signals()
1136 next_signal_check = now + _jitter(SIGNAL_CHECK_INTERVAL)
1138 # Check pending hive tasks
1139 if now >= next_task_check:
1140 self._proactive_check_tasks()
1141 next_task_check = now + _jitter(TASK_CHECK_INTERVAL)
1143 # Pre-download trending models
1144 if now >= next_model_prefetch:
1145 self._proactive_prefetch_models()
1146 next_model_prefetch = now + _jitter(MODEL_PREFETCH_INTERVAL)
1148 # Run benchmark
1149 if now >= next_benchmark:
1150 self._proactive_run_benchmark()
1151 next_benchmark = now + _jitter(BENCHMARK_INTERVAL)
1153 def _proactive_check_signals(self) -> None:
1154 """Check hive signals AND run agentic service discovery."""
1155 if self._mode != MODE_IDLE:
1156 return
1158 # Hive signal feed
1159 try:
1160 from integrations.channels.hive_signal_bridge import get_signal_bridge
1161 bridge = get_signal_bridge()
1162 signals = bridge.get_signal_feed(limit=10)
1163 with self._lock:
1164 self._stats['signals_checked'] += 1
1165 self._stats['proactive_actions'] += 1
1166 if signals:
1167 logger.debug("ResourceGovernor: checked %d hive signals",
1168 len(signals))
1169 self._emit_proactive_event('signal_check', {
1170 'signal_count': len(signals),
1171 })
1172 except Exception as e:
1173 logger.debug("ResourceGovernor: signal check failed: %s", e)
1175 # Agentic service discovery — autonomously find new providers/models
1176 try:
1177 from integrations.providers.discovery_agent import get_discovery_agent
1178 discoveries = get_discovery_agent().run_discovery_cycle()
1179 if discoveries:
1180 with self._lock:
1181 self._stats['proactive_actions'] += 1
1182 self._emit_proactive_event('service_discovery', {
1183 'discoveries': len(discoveries),
1184 })
1185 except Exception as e:
1186 logger.debug("ResourceGovernor: discovery agent failed: %s", e)
1188 def _proactive_check_tasks(self) -> None:
1189 """Check if any pending hive tasks match this node's capabilities."""
1190 if self._mode != MODE_IDLE:
1191 return
1192 try:
1193 from integrations.coding_agent.hive_task_protocol import (
1194 get_dispatcher,
1195 )
1196 dispatcher = get_dispatcher()
1197 dispatched = dispatcher.dispatch_pending()
1198 with self._lock:
1199 self._stats['tasks_dispatched'] += dispatched
1200 self._stats['proactive_actions'] += 1
1201 if dispatched:
1202 logger.info("ResourceGovernor: dispatched %d hive tasks "
1203 "during idle", dispatched)
1204 self._emit_proactive_event('task_dispatch', {
1205 'dispatched': dispatched,
1206 })
1207 except Exception as e:
1208 logger.debug("ResourceGovernor: task check failed: %s", e)
1210 def _proactive_prefetch_models(self) -> None:
1211 """Pre-download trending models if GPU VRAM is available."""
1212 if self._mode != MODE_IDLE or not self._gpu_allowed:
1213 return
1214 try:
1215 # Check with model lifecycle for prefetch suggestions
1216 from integrations.service_tools.model_lifecycle import (
1217 get_model_lifecycle_manager,
1218 )
1219 mgr = get_model_lifecycle_manager()
1220 pressure = mgr.get_system_pressure()
1221 if pressure.get('throttle_factor', 0) < 0.3:
1222 # System too busy for prefetch
1223 return
1224 with self._lock:
1225 self._stats['models_prefetched'] += 1
1226 self._stats['proactive_actions'] += 1
1227 logger.debug("ResourceGovernor: model prefetch check complete")
1228 self._emit_proactive_event('model_prefetch', {
1229 'throttle_factor': pressure.get('throttle_factor', 0),
1230 })
1231 except Exception as e:
1232 logger.debug("ResourceGovernor: model prefetch failed: %s", e)
1234 def _proactive_run_benchmark(self) -> None:
1235 """Run efficiency benchmarks on cloud providers during idle time.
1237 Uses the EfficiencyMatrix to benchmark all configured API providers,
1238 building a continuous picture of speed/quality/cost for optimal routing.
1239 Also checks local model performance via the model_lifecycle manager.
1240 """
1241 if self._mode != MODE_IDLE:
1242 return
1244 # Check system pressure — don't benchmark if system is busy
1245 try:
1246 from integrations.service_tools.model_lifecycle import (
1247 get_model_lifecycle_manager,
1248 )
1249 mgr = get_model_lifecycle_manager()
1250 pressure = mgr.get_system_pressure()
1251 if pressure.get('throttle_factor', 0) < 0.5:
1252 return
1253 except Exception:
1254 pass
1256 # Run provider efficiency benchmarks
1257 try:
1258 from integrations.providers.efficiency_matrix import get_matrix
1259 matrix = get_matrix()
1260 matrix.run_benchmark(model_type='llm')
1261 with self._lock:
1262 self._stats['benchmarks_run'] += 1
1263 self._stats['proactive_actions'] += 1
1264 logger.info("ResourceGovernor: provider benchmarks complete")
1265 self._emit_proactive_event('benchmark', {
1266 'summary': matrix.get_matrix_summary(),
1267 })
1268 except Exception as e:
1269 logger.debug("ResourceGovernor: benchmark failed: %s", e)
1271 def _emit_proactive_event(self, action: str, data: dict) -> None:
1272 """Emit a 'resource.proactive_action' event (best-effort)."""
1273 try:
1274 from core.platform.events import emit_event
1275 emit_event('resource.proactive_action', {
1276 'action': action,
1277 'mode': self._mode,
1278 'timestamp': time.time(),
1279 **data,
1280 })
1281 except Exception:
1282 pass
1285# ═══════════════════════════════════════════════════════════════════════
1286# Singleton
1287# ═══════════════════════════════════════════════════════════════════════
1289_governor: Optional[ResourceGovernor] = None
1290_governor_lock = threading.Lock()
1293def get_governor() -> ResourceGovernor:
1294 """Get or create the singleton ResourceGovernor."""
1295 global _governor
1296 if _governor is None:
1297 with _governor_lock:
1298 if _governor is None:
1299 _governor = ResourceGovernor()
1300 return _governor
1303def should_proceed(resource: str = 'cpu_heavy') -> bool:
1304 """Module-level convenience: should HARTOS proceed with this resource usage?
1306 Returns True if the governor allows the requested resource, or if the
1307 governor has not been started (no throttling by default).
1309 Args:
1310 resource: One of 'cpu_heavy', 'gpu', 'network_heavy', 'disk_heavy'
1312 Usage:
1313 from core.resource_governor import should_proceed
1315 if should_proceed('gpu'):
1316 run_inference()
1317 """
1318 gov = _governor
1319 if gov is None or not gov._running:
1320 return True
1321 return gov.should_allow(resource)