Coverage for integrations / agent_engine / exploration_arm.py: 75.0%
64 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""Non-deterministic exploration arm for the auto-evolve loop.
3Closes the search-space axis of the recursive self-improvement loop.
4The baseline pipeline (``autoevolve_code_tools``) generates candidates
5via an LLM hypothesis — deterministic given the prompt. That exploits
6what the LLM already knows but never reaches candidates the LLM hasn't
7considered.
9This module adds an ε-greedy exploration arm: with probability ε a
10candidate is sampled stochastically from a pool weighted by
11AgentAttribution usage priors; with probability 1-ε the caller runs
12the standard exploit (LLM hypothesis) path.
14Feature flag: ``HEVOLVE_RSI_EXPLORE=1`` (off by default). When off,
15``select_strategy()`` always returns ``'exploit'`` and the arm is
16inert. The promoted candidate still passes RSI-1 (Constitutional) +
17RSI-2 (AgentBaselineService delta) gates inside
18``autoevolve_code_tools.commit_improvement`` — exploration is
19ADDITIVE; safety is non-negotiable.
21Tunables:
22 HEVOLVE_RSI_EPSILON (float in [0,1], default 0.1) exploration rate.
23"""
24import logging
25import os
26import random
27from typing import List, Optional, Tuple
29logger = logging.getLogger('hevolve.rsi_explore')
32_DEFAULT_EPSILON = 0.1
35def _flag_enabled() -> bool:
36 return os.environ.get('HEVOLVE_RSI_EXPLORE', '').lower() in (
37 '1', 'true', 'yes', 'on'
38 )
41def _epsilon() -> float:
42 raw = os.environ.get('HEVOLVE_RSI_EPSILON')
43 if raw is None:
44 return _DEFAULT_EPSILON
45 try:
46 v = float(raw)
47 except (TypeError, ValueError):
48 return _DEFAULT_EPSILON
49 # Clamp — no point in an epsilon outside [0,1].
50 return max(0.0, min(1.0, v))
53def select_strategy(rng: Optional[random.Random] = None) -> str:
54 """Return ``'explore'`` or ``'exploit'``.
56 ``'exploit'`` always when the feature flag is off — callers should
57 treat any value other than ``'explore'`` as "use the deterministic
58 LLM hypothesis path".
59 """
60 if not _flag_enabled():
61 return 'exploit'
62 r = rng if rng is not None else random
63 return 'explore' if r.random() < _epsilon() else 'exploit'
66def weighted_sample(candidates: List[str],
67 weights: Optional[List[float]] = None,
68 rng: Optional[random.Random] = None) -> Optional[str]:
69 """Pick one candidate weighted by ``weights``.
71 Falls back to a uniform pick when weights are missing, mismatched
72 in length, or sum to zero. Returns ``None`` iff ``candidates`` is
73 empty. Pure function — no I/O, no global state.
74 """
75 if not candidates:
76 return None
77 r = rng if rng is not None else random
78 if not weights or len(weights) != len(candidates):
79 return r.choice(candidates)
80 non_neg = [max(0.0, float(w)) for w in weights]
81 if sum(non_neg) <= 0:
82 return r.choice(candidates)
83 return r.choices(candidates, weights=non_neg, k=1)[0]
86def usage_priors_from_attribution(tool_name: str
87 ) -> Tuple[List[str], List[float]]:
88 """Pull recent-use priors from AgentAttribution for ``tool_name``.
90 Returns ``(candidate_keys, weights)``. Fails open to ``([], [])``
91 when AgentAttribution is unavailable or returns an unexpected
92 shape — the caller then falls back to its own pool or to the
93 exploit path.
94 """
95 try:
96 from integrations.agent_engine.agent_attribution import (
97 AgentAttributionOrchestrator,
98 )
99 except Exception as e:
100 logger.debug('exploration_arm: AgentAttribution unavailable (%s)', e)
101 return [], []
102 try:
103 orch = AgentAttributionOrchestrator()
104 snapshot_fn = getattr(orch, 'get_usage_snapshot', None)
105 stats = snapshot_fn() if callable(snapshot_fn) else {}
106 except Exception as e:
107 logger.debug('exploration_arm: usage snapshot failed (%s)', e)
108 return [], []
109 if not isinstance(stats, dict):
110 return [], []
111 related = stats.get(tool_name) or {}
112 if not isinstance(related, dict):
113 return [], []
114 keys: List[str] = []
115 weights: List[float] = []
116 for k, v in related.items():
117 if not k:
118 continue
119 keys.append(str(k))
120 try:
121 weights.append(max(0.0, float(v)))
122 except (TypeError, ValueError):
123 weights.append(0.0)
124 return keys, weights
127def pick_exploration_candidate(
128 tool_name: str,
129 fallback_candidates: Optional[List[str]] = None,
130 rng: Optional[random.Random] = None,
131) -> Optional[str]:
132 """Sample one mutation key using usage priors.
134 Preference order:
135 1. AgentAttribution stats for ``tool_name`` (≥ 1 entry).
136 2. caller-supplied ``fallback_candidates`` (uniform).
137 3. ``None`` — caller MUST fall back to the exploit path.
138 """
139 keys, weights = usage_priors_from_attribution(tool_name)
140 if keys:
141 return weighted_sample(keys, weights, rng=rng)
142 if fallback_candidates:
143 return weighted_sample(fallback_candidates, rng=rng)
144 return None
147def describe_state() -> dict:
148 """Return a small dict for dashboards / diagnostics."""
149 return {
150 'enabled': _flag_enabled(),
151 'epsilon': _epsilon(),
152 }