Coverage for integrations / agent_engine / auto_evolve.py: 62.3%
247 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2Auto Evolve Orchestrator — Democratic thought experiment → autonomous iteration.
4Single entry point for the full evolution loop:
51. GATHER — collect eligible thought experiments
62. FILTER — constitutional gate (ConstitutionalFilter)
73. VOTE — tally democratic votes (human + agent, weighted)
84. SELECT — top-N experiments by approval score
95. DISPATCH — route each winner to its type-aware iteration loop
106. TRACK — monitor progress, feed results back to evolution stack
12Triggered by:
13- Admin "Auto Evolve" button
14- Agent tool `start_auto_evolve`
15- Scheduled cron (optional)
17All iteration is agent-native — the AutoEvolveOrchestrator doesn't run
18experiments itself. It selects which experiments DESERVE to run, then
19dispatches them through the existing agent goal system.
20"""
21import json
22import logging
23import threading
24import time
25import uuid
26from concurrent.futures import ThreadPoolExecutor, as_completed
27from dataclasses import dataclass, field
28from typing import Dict, List, Optional
30logger = logging.getLogger('hevolve.auto_evolve')
32# PRODUCT_MAP §10: DISPATCH uses parallel_dispatch, bounded by this constant.
33# Keep conservative — each dispatched experiment spawns an agent goal which may
34# fan out further downstream. 4 keeps the blast radius small on flat-tier
35# desktops while still honouring the "parallel" contract from the spec.
36AUTO_EVOLVE_MAX_PARALLEL_DISPATCH = 4
38# PRODUCT_MAP §10: super-majority threshold for VOTE stage — candidates must
39# clear 2/3 of the weighted tally, not a simple majority. Expressed as a
40# fraction of the maximum possible score so callers can still tune per-session
41# via min_approval_score (which is applied as an absolute-score floor in
42# addition to this ratio).
43AUTO_EVOLVE_SUPERMAJORITY_RATIO = 2.0 / 3.0
45# Active-learning bias for the VOTE stage. When the world model
46# (HevolveAI side, queried via world_model_bridge.get_learning_feedback)
47# reports HIGH epistemic_uncertainty, the system has the most to learn
48# from running new experiments — so we boost approval scores slightly
49# before sort, biasing the SELECT phase toward the experiments that
50# would reduce uncertainty fastest. Classic AL acquisition.
51#
52# Bounded multiplier: the bias never re-orders candidates that the
53# super-majority gate already rejected, and the boost is small enough
54# (≤ 1 + AL_MAX_BOOST) that a high-quality consensus pick still beats
55# a marginally-passing high-uncertainty pick. The point is a NUDGE on
56# tie-breaking, not a coup.
57AUTO_EVOLVE_AL_MAX_BOOST = 0.25 # +25% upper bound on the AL multiplier
59# Trust gate for the AL signal itself. Prediction-error estimates
60# from a half-trained world model are themselves unreliable — the
61# model can be over-confident on things it has never actually seen,
62# or under-confident on things it has seen but not yet generalized.
63# Using such an uncalibrated signal to bias selection would amplify
64# the model's own blind spots into the experiment queue.
65#
66# Maturity signal: HevolveAI's EmbodiedLearner exposes a
67# `learning_steps` counter via get_stats() — the actual count of
68# learning updates the provider has executed. We require at least
69# AL_TRUST_MIN_STEPS before trusting avg_prediction_error as an AL
70# acquisition signal; below that, the multiplier is forced to 1.0
71# (pure democratic vote decides the rank). Above the floor, the
72# boost ramps in linearly to its full AL_MAX_BOOST value at
73# AL_TRUST_FULL_STEPS.
74#
75# Step threshold tuning: 100 = "model has done enough updates to
76# have a non-trivial loss surface but isn't yet generalized";
77# 10_000 = "fully matured", at which point the prediction-error
78# signal is trusted at full weight. Both tunable once we have
79# real-world calibration data — kept conservative so the bias
80# doesn't fire prematurely.
81AUTO_EVOLVE_AL_TRUST_MIN_STEPS = 100
82AUTO_EVOLVE_AL_TRUST_FULL_STEPS = 10_000
85def _is_sqlite_backend() -> bool:
86 """Return True when the active DB engine is SQLite (flat tier).
88 FIX-1.4a support: SQLite serializes writes at the file level, so
89 parallel dispatch that commits concurrently produces ``database is
90 locked`` errors. Callers use this to fall back to a single worker
91 and preserve the DISPATCH contract without the race.
93 Detection order:
94 1. Ask SQLAlchemy directly via the shared ``engine`` (authoritative).
95 2. Sniff the ``HEVOLVE_DB_URL`` env var (covers first-boot path
96 where the engine hasn't been created yet).
97 3. Default to ``True`` — SQLite is the flat-tier default, so the
98 safer assumption on failure is "serialize."
99 """
100 try:
101 from integrations.social.models import engine as _engine
102 dialect = getattr(getattr(_engine, 'dialect', None), 'name', '') or ''
103 if dialect:
104 return dialect.lower() == 'sqlite'
105 except Exception:
106 pass
107 try:
108 import os
109 url = os.environ.get('HEVOLVE_DB_URL', '') or ''
110 if url:
111 return url.lower().startswith('sqlite')
112 except Exception:
113 pass
114 return True # flat-tier default = SQLite
117@dataclass
118class EvolveSession:
119 """Tracks one auto-evolve cycle."""
120 session_id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
121 status: str = 'pending' # pending | selecting | dispatching | running | completed | failed
122 started_at: float = 0.0
123 candidates: int = 0
124 filtered: int = 0
125 selected: int = 0
126 dispatched: int = 0
127 completed: int = 0
128 failed: int = 0
129 experiments: List[Dict] = field(default_factory=list)
130 errors: List[str] = field(default_factory=list)
132 def to_dict(self) -> Dict:
133 return {
134 'session_id': self.session_id,
135 'status': self.status,
136 'elapsed_s': round(time.time() - self.started_at, 1) if self.started_at else 0,
137 'candidates': self.candidates,
138 'filtered': self.filtered,
139 'selected': self.selected,
140 'dispatched': self.dispatched,
141 'completed': self.completed,
142 'failed': self.failed,
143 'experiments': self.experiments,
144 'errors': self.errors,
145 }
148class AutoEvolveOrchestrator:
149 """Democratic selection + autonomous iteration dispatch.
151 The orchestrator doesn't run experiments — it selects which ones
152 to run based on constitutional + democratic criteria, then dispatches
153 them through the agent goal system for type-aware iteration.
154 """
156 def __init__(self):
157 self._active_session: Optional[EvolveSession] = None
158 self._lock = threading.Lock()
160 def start(self, max_experiments: int = 5,
161 min_approval_score: float = 0.3,
162 statuses: List[str] = None,
163 user_id: str = 'system') -> Dict:
164 """Start an auto-evolve cycle.
166 Args:
167 max_experiments: Max experiments to dispatch in this cycle
168 min_approval_score: Minimum weighted approval score to qualify
169 statuses: Which experiment statuses to consider
170 (default: ['voting', 'evaluating'])
171 user_id: Who triggered the evolve cycle
173 Returns:
174 Session info dict
175 """
176 with self._lock:
177 if self._active_session and self._active_session.status == 'running':
178 return {
179 'success': False,
180 'reason': 'Auto-evolve cycle already running',
181 'session': self._active_session.to_dict(),
182 }
184 session = EvolveSession()
185 session.started_at = time.time()
186 session.status = 'selecting'
188 with self._lock:
189 self._active_session = session
191 # Run in background thread
192 def _run():
193 try:
194 self._execute_cycle(session, max_experiments,
195 min_approval_score,
196 statuses or ['voting', 'evaluating'],
197 user_id)
198 except Exception as e:
199 session.status = 'failed'
200 session.errors.append(str(e))
201 logger.exception(f"[{session.session_id}] Auto-evolve failed: {e}")
203 t = threading.Thread(target=_run, daemon=True,
204 name=f'auto-evolve-{session.session_id}')
205 t.start()
207 return {
208 'success': True,
209 'session_id': session.session_id,
210 'status': 'selecting',
211 }
213 def get_status(self) -> Dict:
214 """Get current auto-evolve session status."""
215 with self._lock:
216 if self._active_session:
217 return self._active_session.to_dict()
218 return {'status': 'idle', 'message': 'No active auto-evolve session'}
220 def _execute_cycle(self, session: EvolveSession,
221 max_experiments: int,
222 min_approval_score: float,
223 statuses: List[str],
224 user_id: str):
225 """Execute the full auto-evolve cycle."""
227 # Phase 1: GATHER candidates
228 candidates = self._gather_candidates(session, statuses)
229 session.candidates = len(candidates)
230 if not candidates:
231 session.status = 'completed'
232 session.errors.append('No eligible experiments found')
233 self._emit_event('auto_evolve.no_candidates', session.to_dict())
234 return
236 # Phase 2: FILTER through constitutional gate
237 approved = self._constitutional_filter(session, candidates)
238 session.filtered = len(approved)
240 # Phase 3: VOTE tally + rank
241 ranked = self._rank_by_votes(session, approved, min_approval_score)
242 session.selected = len(ranked)
244 if not ranked:
245 session.status = 'completed'
246 session.errors.append(
247 f'No experiments met approval threshold ({min_approval_score})')
248 self._emit_event('auto_evolve.none_approved', session.to_dict())
249 return
251 # Phase 4: SELECT top-N
252 winners = ranked[:max_experiments]
254 # Phase 5: DISPATCH to type-aware iteration (parallel per PRODUCT_MAP §10)
255 session.status = 'dispatching'
256 self._emit_event('auto_evolve.dispatching', {
257 'count': len(winners),
258 'experiments': [w['id'] for w in winners],
259 })
261 self._dispatch_winners_parallel(session, winners, user_id)
263 session.status = 'running' if session.dispatched > 0 else 'failed'
264 self._emit_event('auto_evolve.started', session.to_dict())
266 logger.info(f"[{session.session_id}] Auto-evolve dispatched "
267 f"{session.dispatched}/{len(winners)} experiments")
269 def _gather_candidates(self, session: EvolveSession,
270 statuses: List[str]) -> List[Dict]:
271 """Gather eligible thought experiments from DB."""
272 try:
273 from integrations.social.models import db_session
274 from integrations.social.thought_experiment_service import (
275 ThoughtExperimentService)
277 with db_session(commit=False) as db:
278 all_experiments = []
279 for status in statuses:
280 exps = ThoughtExperimentService.get_active_experiments(
281 db, status=status, limit=50)
282 all_experiments.extend(exps)
283 return all_experiments
284 except Exception as e:
285 logger.warning(f"[{session.session_id}] Gather failed: {e}")
286 return []
288 def _constitutional_filter(self, session: EvolveSession,
289 candidates: List[Dict]) -> List[Dict]:
290 """Filter candidates through ConstitutionalFilter."""
291 approved = []
292 for exp in candidates:
293 try:
294 from security.hive_guardrails import ConstitutionalFilter
295 text = f"{exp.get('title', '')}: {exp.get('hypothesis', '')}"
296 check = ConstitutionalFilter.check_prompt(text)
297 ok = check[0] if isinstance(check, tuple) else check.get('approved', True)
298 if ok:
299 approved.append(exp)
300 else:
301 logger.debug(f"[{session.session_id}] Filtered out: {exp.get('id')}")
302 except ImportError:
303 # No filter available — pass through
304 approved.append(exp)
305 return approved
307 def _rank_by_votes(self, session: EvolveSession,
308 candidates: List[Dict],
309 min_score: float) -> List[Dict]:
310 """Tally votes and rank by approval score.
312 Two gates apply per PRODUCT_MAP §10:
313 1. weighted_score >= min_score (caller-provided absolute floor)
314 2. total_for / (total_for + total_against) >= 2/3 super-majority
316 Both must hold for an experiment to qualify for dispatch. The
317 super-majority gate protects against a small but highly-weighted
318 vocal minority flipping a low-participation tally into approval.
320 Once the gates pass, the rank is biased by an active-learning
321 signal pulled from the world model (HevolveAI) — see
322 _active_learning_multiplier docstring. The bias only nudges
323 order; it never re-admits a candidate the gates rejected.
324 """
325 scored = []
326 try:
327 from integrations.social.models import db_session
328 from integrations.social.thought_experiment_service import (
329 ThoughtExperimentService)
331 with db_session(commit=False) as db:
332 for exp in candidates:
333 tally = ThoughtExperimentService.tally_votes(
334 db, exp['id'])
335 score = tally.get('weighted_score', 0)
336 total_for = tally.get('total_for', 0) or 0
337 total_against = tally.get('total_against', 0) or 0
338 decisive = total_for + total_against
339 # Super-majority: ≥ 2/3 of DECISIVE (non-abstain) weight
340 # must be FOR. Abstains are excluded from denominator.
341 super_ratio = (total_for / decisive) if decisive > 0 else 0.0
342 exp['_approval_score'] = score
343 exp['_super_majority'] = round(super_ratio, 4)
344 exp['_tally'] = tally
345 if (score >= min_score
346 and super_ratio >= AUTO_EVOLVE_SUPERMAJORITY_RATIO):
347 scored.append(exp)
348 else:
349 logger.debug(
350 f"[{session.session_id}] Rejected {exp.get('id')}: "
351 f"score={score} super_ratio={super_ratio:.3f} "
352 f"(need score>={min_score} and "
353 f"ratio>={AUTO_EVOLVE_SUPERMAJORITY_RATIO:.3f})"
354 )
355 except Exception as e:
356 logger.warning(f"[{session.session_id}] Vote tally failed: {e}")
357 return candidates # Fall through unranked
359 # Active-learning bias: pull a global epistemic-uncertainty
360 # multiplier from the world model and apply it as a small
361 # boost to each gated-in candidate's approval score before
362 # sort. No-op when the world model isn't reachable.
363 al_mult = self._active_learning_multiplier(session)
364 for exp in scored:
365 base = exp.get('_approval_score', 0) or 0
366 biased = base * al_mult
367 exp['_active_learning_multiplier'] = round(al_mult, 4)
368 exp['_biased_score'] = round(biased, 4)
370 # Sort by biased score (falls back to unbiased when al_mult=1.0)
371 scored.sort(
372 key=lambda e: e.get('_biased_score',
373 e.get('_approval_score', 0)),
374 reverse=True,
375 )
376 return scored
378 def _active_learning_multiplier(self, session: EvolveSession) -> float:
379 """Pull the world model's current avg_prediction_error +
380 learning_steps via agent_baseline_service._collect_world_model_metrics()
381 — the same normalized signals persisted into every baseline
382 snapshot — and project them into a [1.0, 1+AL_MAX_BOOST]
383 multiplier, gated on model maturity.
385 Why colocated with _rank_by_votes (not in baseline_service):
386 AL acquisition is a SELECTION concern (which candidates does
387 the system want to run NEXT), not a SNAPSHOT concern (what did
388 the system look like at this moment). baseline_service owns
389 the COLLECTOR (single source for HevolveAI feedback shape);
390 this method owns the SELECTION USE of that signal.
392 Two-stage trust gate:
393 stage 1 — does the AL signal exist?
394 No: return 1.0 (no bias).
395 stage 2 — has the world model done enough learning_steps to
396 produce a CALIBRATED prediction-error estimate?
397 < AL_TRUST_MIN_STEPS: return 1.0 (uncertainty about
398 uncertainty — using the signal would amplify the
399 model's own blind spots into the experiment queue).
400 >= AL_TRUST_FULL_STEPS: full AL_MAX_BOOST.
401 in between: linear ramp.
403 Returns 1.0 in any failure path so a missing / immature world
404 model falls back to pure democratic vote — preserves the
405 system's pre-wiring behavior.
407 HevolveAI field names verified against
408 embodied_learner.py::EmbodiedLearner.get_stats — do not
409 invent (`epistemic_uncertainty`, `learning_progress`, etc.
410 do NOT exist on get_stats; the real keys are
411 avg_prediction_error and learning_steps).
412 """
413 try:
414 from integrations.agent_engine.agent_baseline_service import (
415 AgentBaselineService)
416 wm = AgentBaselineService._collect_world_model_metrics()
417 except Exception:
418 return 1.0
419 if not wm:
420 return 1.0
422 # Stage 1: AL signal present? Higher prediction error =
423 # more to learn from this domain = stronger acquisition
424 # value. Already clamped to [0,1] by the collector.
425 err = wm.get('al_signal')
426 if not isinstance(err, (int, float)):
427 return 1.0
429 # Stage 2: has the model trained enough that its prediction-
430 # error estimate is calibrated?
431 steps = wm.get('learning_steps')
432 if not isinstance(steps, int) or steps < AUTO_EVOLVE_AL_TRUST_MIN_STEPS:
433 logger.debug(
434 f"[{session.session_id}] World model still in bootstrap "
435 f"(learning_steps={steps} < {AUTO_EVOLVE_AL_TRUST_MIN_STEPS}) "
436 f"— skipping AL bias (al_signal={err:.4f} considered uncalibrated)"
437 )
438 return 1.0
440 # Linear ramp from 0 at MIN_STEPS to 1 at FULL_STEPS
441 ramp_span = AUTO_EVOLVE_AL_TRUST_FULL_STEPS - AUTO_EVOLVE_AL_TRUST_MIN_STEPS
442 trust_factor = (steps - AUTO_EVOLVE_AL_TRUST_MIN_STEPS) / ramp_span
443 trust_factor = max(0.0, min(1.0, trust_factor))
445 mult = 1.0 + (err * AUTO_EVOLVE_AL_MAX_BOOST * trust_factor)
446 logger.debug(
447 f"[{session.session_id}] AL bias active: "
448 f"al_signal={err:.4f}, learning_steps={steps}, "
449 f"trust_factor={trust_factor:.4f} → mult={mult:.4f}"
450 )
451 return mult
453 def _dispatch_winners_parallel(self, session: EvolveSession,
454 winners: List[Dict], user_id: str) -> None:
455 """Fan-out winning experiments to type-aware iteration in parallel.
457 PRODUCT_MAP §10 DISPATCH stage. Uses a bounded ThreadPoolExecutor
458 (cap = AUTO_EVOLVE_MAX_PARALLEL_DISPATCH) so a large approved set
459 can't stampede the agent goal system. Each experiment still goes
460 through _dispatch_experiment (unchanged contract) — this method
461 only adds the concurrency primitive.
463 Mutations on `session` (dispatched/failed/experiments/errors) are
464 serialized by acquiring self._lock around each bookkeeping update.
466 FIX-1.4a: On SQLite (flat tier), ``db.commit()`` serializes the
467 whole database file, and multiple concurrent commits produce
468 ``database is locked`` SQLALchemy errors. Detect the engine URL
469 and drop ``max_workers`` to 1 when SQLite is the backend. MySQL
470 (regional/central) keeps the bounded parallel dispatch.
472 FIX-1.4b: ``_dispatch_experiment`` returns ``{'success': False,
473 'reason': ...}`` on logical failure (e.g. already-dispatched
474 experiment, missing goal recipe) WITHOUT raising. The earlier
475 code counted those as successful dispatches, so a session with
476 100 % logical-failure returns would sit at status='running' with
477 ``dispatched == N`` and ``failed == 0`` forever. Branch on the
478 ``success`` flag: only count dispatched on True, else route to
479 the ``failed`` counter + errors list so the terminal-state rule
480 at line 192 (``running if dispatched > 0 else failed``) fires
481 correctly.
482 """
483 if not winners:
484 return
486 max_parallel = AUTO_EVOLVE_MAX_PARALLEL_DISPATCH
487 if _is_sqlite_backend():
488 max_parallel = 1 # FIX-1.4a: serialize on flat/SQLite
489 logger.debug(
490 f"[{session.session_id}] SQLite backend detected — "
491 f"serializing dispatch to avoid 'database is locked'")
493 max_workers = min(len(winners), max_parallel)
494 with ThreadPoolExecutor(
495 max_workers=max_workers,
496 thread_name_prefix=f'auto-evolve-{session.session_id}') as pool:
497 futures = {
498 pool.submit(self._dispatch_experiment, session, exp, user_id): exp
499 for exp in winners
500 }
501 for future in as_completed(futures):
502 exp = futures[future]
503 try:
504 goal_result = future.result()
505 # FIX-1.4b: distinguish logical success from logical failure.
506 # _dispatch_experiment returns dict — missing/falsy 'success'
507 # key = logical failure, not dispatched.
508 success = (
509 isinstance(goal_result, dict)
510 and bool(goal_result.get('success'))
511 )
512 with self._lock:
513 if success:
514 session.dispatched += 1
515 session.experiments.append({
516 'id': exp['id'],
517 'title': exp.get('title', ''),
518 'type': exp.get('experiment_type', 'traditional'),
519 'approval_score': exp.get('_approval_score', 0),
520 'super_majority': exp.get('_super_majority', 0),
521 'goal_id': goal_result.get('goal_id')
522 if isinstance(goal_result, dict) else None,
523 'status': 'dispatched',
524 })
525 else:
526 session.failed += 1
527 reason = (
528 goal_result.get('reason')
529 if isinstance(goal_result, dict)
530 else 'non-dict result'
531 ) or 'unknown'
532 session.errors.append(
533 f"Dispatch {exp['id']}: {reason}")
534 session.experiments.append({
535 'id': exp['id'],
536 'title': exp.get('title', ''),
537 'type': exp.get('experiment_type', 'traditional'),
538 'approval_score': exp.get('_approval_score', 0),
539 'super_majority': exp.get('_super_majority', 0),
540 'goal_id': None,
541 'status': 'failed',
542 'reason': reason,
543 })
544 logger.info(
545 f"[{session.session_id}] Dispatch declined "
546 f"for {exp['id']}: {reason}")
547 except Exception as e:
548 with self._lock:
549 session.failed += 1
550 session.errors.append(f"Dispatch {exp['id']}: {e}")
551 logger.warning(
552 f"[{session.session_id}] Failed to dispatch "
553 f"{exp['id']}: {e}")
555 def _dispatch_experiment(self, session: EvolveSession,
556 exp: Dict, user_id: str) -> Dict:
557 """Dispatch a winning experiment to its type-aware iteration loop.
559 Uses ThoughtExperimentService.request_agent_evaluation() which
560 creates an agent goal with the type-aware iteration recipe.
562 Special case: experiments with experiment_type='code_evolution' and
563 target_repo='hevolveai' are dispatched to the HevolveAI evolution
564 orchestrator inshghtead of the standard evaluation pipeline.
565 """
566 from integrations.social.models import db_session
567 from integrations.social.thought_experiment_service import (
568 ThoughtExperimentService)
570 with db_session(commit=False) as db:
571 result = ThoughtExperimentService.request_agent_evaluation(
572 db, exp['id'])
573 if result.get('success'):
574 db.commit()
575 return result
577 def _emit_event(self, topic: str, data: Dict):
578 """Emit progress event via EventBus."""
579 try:
580 from core.platform.events import emit_event
581 emit_event(topic, data)
582 except Exception:
583 pass
586# ── Singleton ────────────────────────────────────────────────
588_orchestrator: Optional[AutoEvolveOrchestrator] = None
589_lock = threading.Lock()
592def get_auto_evolve_orchestrator() -> AutoEvolveOrchestrator:
593 """Get or create the singleton AutoEvolveOrchestrator."""
594 global _orchestrator
595 if _orchestrator is None:
596 with _lock:
597 if _orchestrator is None:
598 _orchestrator = AutoEvolveOrchestrator()
599 return _orchestrator
602# ── Owner Pause/Resume ───────────────────────────────────────
604# Paused experiment IDs — owner can pause their experiment's iteration
605_paused_experiments: Dict[str, str] = {} # experiment_id → paused_by_user_id
606_pause_lock = threading.Lock()
609def pause_experiment_evolution(experiment_id: str, user_id: str) -> Dict:
610 """Pause a running experiment's iteration (owner only).
612 The experiment stays in 'evaluating' status but the agent goal
613 is signalled to stop iterating.
614 """
615 # Verify ownership
616 try:
617 from integrations.social.models import db_session
618 from integrations.social.thought_experiment_service import ThoughtExperimentService
619 with db_session(commit=False) as db:
620 detail = ThoughtExperimentService.get_experiment_detail(
621 db, experiment_id)
622 if not detail:
623 return {'success': False, 'reason': 'not_found'}
624 if detail.get('creator_id') != user_id:
625 return {'success': False, 'reason': 'not_owner',
626 'message': 'Only the experiment creator can pause it'}
627 except Exception as e:
628 return {'success': False, 'reason': str(e)}
630 with _pause_lock:
631 _paused_experiments[experiment_id] = user_id
633 logger.info(f"Experiment {experiment_id} paused by {user_id}")
634 return {'success': True, 'experiment_id': experiment_id, 'status': 'paused'}
637def resume_experiment_evolution(experiment_id: str, user_id: str) -> Dict:
638 """Resume a paused experiment's iteration (owner only)."""
639 with _pause_lock:
640 paused_by = _paused_experiments.get(experiment_id)
641 if not paused_by:
642 return {'success': False, 'reason': 'not_paused'}
643 if paused_by != user_id:
644 return {'success': False, 'reason': 'not_owner',
645 'message': 'Only the user who paused can resume'}
646 del _paused_experiments[experiment_id]
648 logger.info(f"Experiment {experiment_id} resumed by {user_id}")
649 return {'success': True, 'experiment_id': experiment_id, 'status': 'resumed'}
652def is_experiment_paused(experiment_id: str) -> bool:
653 """Check if an experiment's evolution is paused."""
654 with _pause_lock:
655 return experiment_id in _paused_experiments
658def get_paused_experiments() -> List[str]:
659 """Get list of paused experiment IDs."""
660 with _pause_lock:
661 return list(_paused_experiments.keys())
664# ── Agent Tool Functions ─────────────────────────────────────
666def start_auto_evolve(max_experiments: int = 5,
667 min_approval_score: float = 0.3,
668 user_id: str = 'system') -> str:
669 """Start an auto-evolve cycle: democratically select thought experiments
670 and dispatch them to autonomous iteration loops.
672 The orchestrator:
673 1. Gathers eligible thought experiments (voting/evaluating phase)
674 2. Filters through ConstitutionalFilter
675 3. Tallies democratic votes (human + agent, weighted)
676 4. Selects top-N by approval score
677 5. Dispatches each to its type-aware iteration loop
679 Software experiments → autoresearch (edit→run→metric→iterate)
680 Traditional experiments → reason_and_refine (hypothesize→score→refine)
681 Physical AI experiments → observe_and_measure
683 Args:
684 max_experiments: Max experiments to dispatch (default 5)
685 min_approval_score: Minimum weighted vote score to qualify (default 0.3)
686 user_id: Who triggered the cycle
688 Returns:
689 JSON with session_id and status
690 """
691 orch = get_auto_evolve_orchestrator()
692 result = orch.start(
693 max_experiments=int(max_experiments),
694 min_approval_score=float(min_approval_score),
695 user_id=user_id,
696 )
697 return json.dumps(result)
700def get_auto_evolve_status() -> str:
701 """Get the status of the current auto-evolve cycle.
703 Returns progress including: candidates gathered, filtered, selected,
704 dispatched, and per-experiment status.
705 """
706 orch = get_auto_evolve_orchestrator()
707 return json.dumps(orch.get_status())
710def pause_evolve_experiment(experiment_id: str, user_id: str) -> str:
711 """Pause a running thought experiment's evolution loop.
713 Only the experiment creator (owner) can pause their experiment.
714 The experiment stays in 'evaluating' status but iteration stops.
716 Args:
717 experiment_id: The ThoughtExperiment ID to pause
718 user_id: ID of the user requesting pause (must be creator)
720 Returns:
721 JSON with success status
722 """
723 result = pause_experiment_evolution(experiment_id, user_id)
724 return json.dumps(result)
727def resume_evolve_experiment(experiment_id: str, user_id: str) -> str:
728 """Resume a paused thought experiment's evolution loop.
730 Only the user who paused it can resume.
732 Args:
733 experiment_id: The ThoughtExperiment ID to resume
734 user_id: ID of the user requesting resume (must be pauser)
736 Returns:
737 JSON with success status
738 """
739 result = resume_experiment_evolution(experiment_id, user_id)
740 return json.dumps(result)
743# Tool registration for ServiceToolRegistry
744AUTO_EVOLVE_TOOLS = [
745 {
746 'name': 'start_auto_evolve',
747 'func': start_auto_evolve,
748 'description': (
749 'Start democratic auto-evolve cycle: gather thought experiments, '
750 'constitutional filter, vote tally, dispatch winners to '
751 'autonomous iteration loops.'
752 ),
753 'tags': ['auto_evolve', 'thought_experiment'],
754 },
755 {
756 'name': 'get_auto_evolve_status',
757 'func': get_auto_evolve_status,
758 'description': 'Get progress of the current auto-evolve cycle.',
759 'tags': ['auto_evolve'],
760 },
761 {
762 'name': 'pause_evolve_experiment',
763 'func': pause_evolve_experiment,
764 'description': (
765 'Pause a running thought experiment evolution (owner only). '
766 'Stops iteration but keeps evaluating status.'
767 ),
768 'tags': ['auto_evolve', 'thought_experiment'],
769 },
770 {
771 'name': 'resume_evolve_experiment',
772 'func': resume_evolve_experiment,
773 'description': 'Resume a paused thought experiment evolution (owner only).',
774 'tags': ['auto_evolve', 'thought_experiment'],
775 },
776]