Coverage for integrations/agent_engine/exploration

1"""Non-deterministic exploration arm for the auto-evolve loop.

3Closes the search-space axis of the recursive self-improvement loop.

4The baseline pipeline (``autoevolve_code_tools``) generates candidates

5via an LLM hypothesis — deterministic given the prompt. That exploits

6what the LLM already knows but never reaches candidates the LLM hasn't

7considered.

9This module adds an ε-greedy exploration arm: with probability ε a

10candidate is sampled stochastically from a pool weighted by

11AgentAttribution usage priors; with probability 1-ε the caller runs

12the standard exploit (LLM hypothesis) path.

14Feature flag: ``HEVOLVE_RSI_EXPLORE=1`` (off by default). When off,

15``select_strategy()`` always returns ``'exploit'`` and the arm is

16inert. The promoted candidate still passes RSI-1 (Constitutional) +

17RSI-2 (AgentBaselineService delta) gates inside

18``autoevolve_code_tools.commit_improvement`` — exploration is

19ADDITIVE; safety is non-negotiable.

21Tunables:

22 HEVOLVE_RSI_EPSILON (float in [0,1], default 0.1) exploration rate.

23"""

24import logging

25import os

26import random

27from typing import List, Optional, Tuple

29logger = logging.getLogger('hevolve.rsi_explore')

32_DEFAULT_EPSILON = 0.1

35def _flag_enabled() -> bool:

36 return os.environ.get('HEVOLVE_RSI_EXPLORE', '').lower() in (

37 '1', 'true', 'yes', 'on'

38 )

41def _epsilon() -> float:

42 raw = os.environ.get('HEVOLVE_RSI_EPSILON')

43 if raw is None:

44 return _DEFAULT_EPSILON

45 try:

46 v = float(raw)

47 except (TypeError, ValueError):

48 return _DEFAULT_EPSILON

49 # Clamp — no point in an epsilon outside [0,1].

50 return max(0.0, min(1.0, v))

53def select_strategy(rng: Optional[random.Random] = None) -> str:

54 """Return ``'explore'`` or ``'exploit'``.

56 ``'exploit'`` always when the feature flag is off — callers should

57 treat any value other than ``'explore'`` as "use the deterministic

58 LLM hypothesis path".

59 """

60 if not _flag_enabled():

61 return 'exploit'

62 r = rng if rng is not None else random

63 return 'explore' if r.random() < _epsilon() else 'exploit'

66def weighted_sample(candidates: List[str],

67 weights: Optional[List[float]] = None,

68 rng: Optional[random.Random] = None) -> Optional[str]:

69 """Pick one candidate weighted by ``weights``.

71 Falls back to a uniform pick when weights are missing, mismatched

72 in length, or sum to zero. Returns ``None`` iff ``candidates`` is

73 empty. Pure function — no I/O, no global state.

74 """

75 if not candidates:

76 return None

77 r = rng if rng is not None else random

78 if not weights or len(weights) != len(candidates):

79 return r.choice(candidates)

80 non_neg = [max(0.0, float(w)) for w in weights]

81 if sum(non_neg) <= 0:

82 return r.choice(candidates)

83 return r.choices(candidates, weights=non_neg, k=1)[0]

86def usage_priors_from_attribution(tool_name: str

87 ) -> Tuple[List[str], List[float]]:

88 """Pull recent-use priors from AgentAttribution for ``tool_name``.

90 Returns ``(candidate_keys, weights)``. Fails open to ``([], [])``

91 when AgentAttribution is unavailable or returns an unexpected

92 shape — the caller then falls back to its own pool or to the

93 exploit path.

94 """

95 try:

96 from integrations.agent_engine.agent_attribution import (

97 AgentAttributionOrchestrator,

98 )

99 except Exception as e:

100 logger.debug('exploration_arm: AgentAttribution unavailable (%s)', e)

101 return [], []

102 try:

103 orch = AgentAttributionOrchestrator()

104 snapshot_fn = getattr(orch, 'get_usage_snapshot', None)

105 stats = snapshot_fn() if callable(snapshot_fn) else {}

106 except Exception as e:

107 logger.debug('exploration_arm: usage snapshot failed (%s)', e)

108 return [], []

109 if not isinstance(stats, dict):

110 return [], []

111 related = stats.get(tool_name) or {}

112 if not isinstance(related, dict):

113 return [], []

114 keys: List[str] = []

115 weights: List[float] = []

116 for k, v in related.items():

117 if not k:

118 continue

119 keys.append(str(k))

120 try:

121 weights.append(max(0.0, float(v)))

122 except (TypeError, ValueError):

123 weights.append(0.0)

124 return keys, weights

125

126

127def pick_exploration_candidate(

128 tool_name: str,

129 fallback_candidates: Optional[List[str]] = None,

130 rng: Optional[random.Random] = None,

131) -> Optional[str]:

132 """Sample one mutation key using usage priors.

133

134 Preference order:

135 1. AgentAttribution stats for ``tool_name`` (≥ 1 entry).

136 2. caller-supplied ``fallback_candidates`` (uniform).

137 3. ``None`` — caller MUST fall back to the exploit path.

138 """

139 keys, weights = usage_priors_from_attribution(tool_name)

140 if keys:

141 return weighted_sample(keys, weights, rng=rng)

142 if fallback_candidates:

143 return weighted_sample(fallback_candidates, rng=rng)

144 return None

145

146

147def describe_state() -> dict:

148 """Return a small dict for dashboards / diagnostics."""

149 return {

150 'enabled': _flag_enabled(),

151 'epsilon': _epsilon(),

152 }

Coverage for integrations / agent_engine / exploration_arm.py: 75.0%

64 statements