Coverage for integrations / agent_engine / exploration_arm.py: 75.0%

64 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1"""Non-deterministic exploration arm for the auto-evolve loop. 

2 

3Closes the search-space axis of the recursive self-improvement loop. 

4The baseline pipeline (``autoevolve_code_tools``) generates candidates 

5via an LLM hypothesis — deterministic given the prompt. That exploits 

6what the LLM already knows but never reaches candidates the LLM hasn't 

7considered. 

8 

9This module adds an ε-greedy exploration arm: with probability ε a 

10candidate is sampled stochastically from a pool weighted by 

11AgentAttribution usage priors; with probability 1-ε the caller runs 

12the standard exploit (LLM hypothesis) path. 

13 

14Feature flag: ``HEVOLVE_RSI_EXPLORE=1`` (off by default). When off, 

15``select_strategy()`` always returns ``'exploit'`` and the arm is 

16inert. The promoted candidate still passes RSI-1 (Constitutional) + 

17RSI-2 (AgentBaselineService delta) gates inside 

18``autoevolve_code_tools.commit_improvement`` — exploration is 

19ADDITIVE; safety is non-negotiable. 

20 

21Tunables: 

22 HEVOLVE_RSI_EPSILON (float in [0,1], default 0.1) exploration rate. 

23""" 

24import logging 

25import os 

26import random 

27from typing import List, Optional, Tuple 

28 

29logger = logging.getLogger('hevolve.rsi_explore') 

30 

31 

32_DEFAULT_EPSILON = 0.1 

33 

34 

35def _flag_enabled() -> bool: 

36 return os.environ.get('HEVOLVE_RSI_EXPLORE', '').lower() in ( 

37 '1', 'true', 'yes', 'on' 

38 ) 

39 

40 

41def _epsilon() -> float: 

42 raw = os.environ.get('HEVOLVE_RSI_EPSILON') 

43 if raw is None: 

44 return _DEFAULT_EPSILON 

45 try: 

46 v = float(raw) 

47 except (TypeError, ValueError): 

48 return _DEFAULT_EPSILON 

49 # Clamp — no point in an epsilon outside [0,1]. 

50 return max(0.0, min(1.0, v)) 

51 

52 

53def select_strategy(rng: Optional[random.Random] = None) -> str: 

54 """Return ``'explore'`` or ``'exploit'``. 

55 

56 ``'exploit'`` always when the feature flag is off — callers should 

57 treat any value other than ``'explore'`` as "use the deterministic 

58 LLM hypothesis path". 

59 """ 

60 if not _flag_enabled(): 

61 return 'exploit' 

62 r = rng if rng is not None else random 

63 return 'explore' if r.random() < _epsilon() else 'exploit' 

64 

65 

66def weighted_sample(candidates: List[str], 

67 weights: Optional[List[float]] = None, 

68 rng: Optional[random.Random] = None) -> Optional[str]: 

69 """Pick one candidate weighted by ``weights``. 

70 

71 Falls back to a uniform pick when weights are missing, mismatched 

72 in length, or sum to zero. Returns ``None`` iff ``candidates`` is 

73 empty. Pure function — no I/O, no global state. 

74 """ 

75 if not candidates: 

76 return None 

77 r = rng if rng is not None else random 

78 if not weights or len(weights) != len(candidates): 

79 return r.choice(candidates) 

80 non_neg = [max(0.0, float(w)) for w in weights] 

81 if sum(non_neg) <= 0: 

82 return r.choice(candidates) 

83 return r.choices(candidates, weights=non_neg, k=1)[0] 

84 

85 

86def usage_priors_from_attribution(tool_name: str 

87 ) -> Tuple[List[str], List[float]]: 

88 """Pull recent-use priors from AgentAttribution for ``tool_name``. 

89 

90 Returns ``(candidate_keys, weights)``. Fails open to ``([], [])`` 

91 when AgentAttribution is unavailable or returns an unexpected 

92 shape — the caller then falls back to its own pool or to the 

93 exploit path. 

94 """ 

95 try: 

96 from integrations.agent_engine.agent_attribution import ( 

97 AgentAttributionOrchestrator, 

98 ) 

99 except Exception as e: 

100 logger.debug('exploration_arm: AgentAttribution unavailable (%s)', e) 

101 return [], [] 

102 try: 

103 orch = AgentAttributionOrchestrator() 

104 snapshot_fn = getattr(orch, 'get_usage_snapshot', None) 

105 stats = snapshot_fn() if callable(snapshot_fn) else {} 

106 except Exception as e: 

107 logger.debug('exploration_arm: usage snapshot failed (%s)', e) 

108 return [], [] 

109 if not isinstance(stats, dict): 

110 return [], [] 

111 related = stats.get(tool_name) or {} 

112 if not isinstance(related, dict): 

113 return [], [] 

114 keys: List[str] = [] 

115 weights: List[float] = [] 

116 for k, v in related.items(): 

117 if not k: 

118 continue 

119 keys.append(str(k)) 

120 try: 

121 weights.append(max(0.0, float(v))) 

122 except (TypeError, ValueError): 

123 weights.append(0.0) 

124 return keys, weights 

125 

126 

127def pick_exploration_candidate( 

128 tool_name: str, 

129 fallback_candidates: Optional[List[str]] = None, 

130 rng: Optional[random.Random] = None, 

131) -> Optional[str]: 

132 """Sample one mutation key using usage priors. 

133 

134 Preference order: 

135 1. AgentAttribution stats for ``tool_name`` (≥ 1 entry). 

136 2. caller-supplied ``fallback_candidates`` (uniform). 

137 3. ``None`` — caller MUST fall back to the exploit path. 

138 """ 

139 keys, weights = usage_priors_from_attribution(tool_name) 

140 if keys: 

141 return weighted_sample(keys, weights, rng=rng) 

142 if fallback_candidates: 

143 return weighted_sample(fallback_candidates, rng=rng) 

144 return None 

145 

146 

147def describe_state() -> dict: 

148 """Return a small dict for dashboards / diagnostics.""" 

149 return { 

150 'enabled': _flag_enabled(), 

151 'epsilon': _epsilon(), 

152 }