Coverage for integrations / agent_engine / pr_review_service.py: 73.5%
117 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2PR Review Service — Coding agent reviews PRs against baselines
4Auto-approves simple changes with passing tests and no regression.
5Flags complex changes for steward review.
6Auto-rejects build breakers (test failures or baseline regressions).
8Decision Matrix:
9 Tests Pass + No Regression + Simple → AUTO-APPROVE
10 Tests Pass + No Regression + Complex → FLAG for steward
11 Tests Pass + Regression → AUTO-REJECT (build breaker)
12 Tests Fail → AUTO-REJECT (build breaker)
13"""
14import json
15import logging
16import os
17import subprocess
18from typing import Dict, List, Optional
20logger = logging.getLogger('hevolve_social')
23class PRReviewService:
24 """Coding agent reviews PRs against baselines. Static methods only."""
26 @staticmethod
27 def review_pr(repo_url: str, pr_number: int) -> Dict:
28 """Full PR review pipeline.
30 1. Fetch PR diff stats
31 2. Run pre-commit checks (lint, format)
32 3. Run test suite
33 4. Validate baseline (no regression)
34 5. Classify change complexity
35 6. Decide: auto-approve / flag_steward / request_changes
36 """
37 # 1. Fetch PR diff
38 diff_stats = PRReviewService._fetch_pr_diff(repo_url, pr_number)
39 if diff_stats.get('error'):
40 return {'decision': 'error', 'error': diff_stats['error']}
42 # 2. Pre-commit checks
43 precommit = PRReviewService.run_pre_commit_checks()
45 # 3. Test suite
46 test_results = PRReviewService.run_test_suite()
48 # 4. Baseline validation
49 baseline = PRReviewService.validate_baseline()
51 # 5. Classify complexity
52 complexity = PRReviewService.classify_change(diff_stats)
54 # 6. Decision
55 tests_pass = test_results.get('pass_rate', 0) >= 0.95
56 no_regression = baseline.get('passed', True)
57 is_simple = complexity == 'simple'
59 if not tests_pass:
60 decision = 'request_changes'
61 reason = 'Build breaker: tests failing'
62 elif not no_regression:
63 decision = 'request_changes'
64 reason = (f'Baseline regression detected: '
65 f'{baseline.get("regressions", [])}')
66 elif is_simple:
67 decision = 'approve'
68 reason = 'Simple change, tests pass, no regression'
69 else:
70 decision = 'flag_steward'
71 reason = f'Complex change ({complexity}), needs steward review'
73 review = {
74 'decision': decision,
75 'reason': reason,
76 'pr_number': pr_number,
77 'diff_stats': diff_stats,
78 'precommit': precommit,
79 'test_results': test_results,
80 'baseline_validation': baseline,
81 'change_complexity': complexity,
82 }
84 # Post review to GitHub
85 try:
86 PRReviewService.post_review(repo_url, pr_number, review)
87 except Exception as e:
88 logger.debug(f"Failed to post review: {e}")
89 review['review_posted'] = False
91 return review
93 @staticmethod
94 def _fetch_pr_diff(repo_url: str, pr_number: int) -> Dict:
95 """Fetch PR diff stats via gh CLI."""
96 try:
97 from .private_repo_access import _extract_owner_repo
98 owner_repo = _extract_owner_repo(repo_url)
99 if not owner_repo:
100 return {'error': f'Cannot parse repo: {repo_url}'}
102 owner, repo = owner_repo
104 result = subprocess.run(
105 ['gh', 'api',
106 f'repos/{owner}/{repo}/pulls/{pr_number}',
107 '--jq',
108 '{files_changed: .changed_files, '
109 'additions: .additions, '
110 'deletions: .deletions}'],
111 capture_output=True, text=True, timeout=30)
113 if result.returncode == 0:
114 return json.loads(result.stdout)
115 return {'error': result.stderr[:200],
116 'files_changed': 0, 'additions': 0, 'deletions': 0}
117 except Exception as e:
118 return {'error': str(e),
119 'files_changed': 0, 'additions': 0, 'deletions': 0}
121 @staticmethod
122 def run_pre_commit_checks(repo_path: str = '.') -> Dict:
123 """Run lint and format checks."""
124 issues = []
126 # Try ruff lint
127 try:
128 result = subprocess.run(
129 ['ruff', 'check', repo_path, '--select', 'E,W,F'],
130 capture_output=True, text=True, timeout=120)
131 if result.returncode != 0:
132 issues.append({
133 'tool': 'ruff',
134 'output': result.stdout[:500],
135 })
136 except FileNotFoundError:
137 pass # ruff not installed
138 except Exception:
139 pass
141 return {
142 'passed': len(issues) == 0,
143 'issues': issues,
144 }
146 @staticmethod
147 def run_test_suite(repo_path: str = '.') -> Dict:
148 """Run pytest and capture results."""
149 try:
150 import sys as _sys
151 python = _sys.executable # Always use current interpreter, not env var
152 result = subprocess.run(
153 [python, '-m', 'pytest', 'tests/', '-v', '-s',
154 '--tb=short', '-q'],
155 capture_output=True, text=True, timeout=600,
156 cwd=repo_path)
158 # Parse pytest output
159 output = result.stdout + result.stderr
160 passed = 0
161 failed = 0
163 import re as _re
164 for line in output.split('\n'):
165 line = line.strip()
166 if ' passed' in line or ' failed' in line:
167 # Match patterns like "100 passed" or "5 failed"
168 m_pass = _re.search(r'(\d+)\s+passed', line)
169 m_fail = _re.search(r'(\d+)\s+failed', line)
170 if m_pass:
171 passed = int(m_pass.group(1))
172 if m_fail:
173 failed = int(m_fail.group(1))
175 total = passed + failed
176 pass_rate = passed / max(1, total)
178 return {
179 'passed': passed,
180 'failed': failed,
181 'total': total,
182 'pass_rate': round(pass_rate, 4),
183 'returncode': result.returncode,
184 }
185 except Exception as e:
186 return {
187 'passed': 0, 'failed': 0, 'total': 0,
188 'pass_rate': 0.0,
189 'error': str(e),
190 }
192 @staticmethod
193 def validate_baseline(repo_path: str = '.') -> Dict:
194 """Validate all active agents against their baselines."""
195 try:
196 from .agent_baseline_service import AgentBaselineService, BASELINE_DIR
197 from pathlib import Path
199 baseline_dir = Path(BASELINE_DIR)
200 if not baseline_dir.exists():
201 return {'passed': True, 'regressions': [],
202 'reason': 'No baselines to validate'}
204 all_regressions = []
205 for agent_dir in baseline_dir.iterdir():
206 if not agent_dir.is_dir():
207 continue
208 parts = agent_dir.name.rsplit('_', 1)
209 if len(parts) != 2:
210 continue
211 prompt_id, flow_id_str = parts
212 try:
213 flow_id = int(flow_id_str)
214 except ValueError:
215 continue
217 result = AgentBaselineService.validate_against_baseline(
218 prompt_id, flow_id)
219 if result and not result.get('passed', True):
220 all_regressions.extend([
221 f'{agent_dir.name}: {r}'
222 for r in result.get('regressions', [])
223 ])
225 return {
226 'passed': len(all_regressions) == 0,
227 'regressions': all_regressions,
228 }
229 except Exception as e:
230 return {'passed': True, 'regressions': [],
231 'error': str(e)}
233 @staticmethod
234 def classify_change(diff_stats: dict) -> str:
235 """Classify change complexity.
237 simple: <= 3 files, < 100 lines
238 moderate: <= 10 files, < 500 lines
239 complex: > 10 files or > 500 lines
241 Also bumps to 'complex' if cyclomatic complexity exceeds
242 thresholds (via PRGuardian).
243 """
244 files = diff_stats.get('files_changed', 0)
245 lines = (diff_stats.get('additions', 0) +
246 diff_stats.get('deletions', 0))
248 if files <= 3 and lines < 100:
249 base = 'simple'
250 elif files <= 10 and lines < 500:
251 base = 'moderate'
252 else:
253 base = 'complex'
255 # If guardian analysis found violations, bump to at least moderate
256 if diff_stats.get('guardian_violations', 0) > 0 and base == 'simple':
257 return 'moderate'
259 return base
261 @staticmethod
262 def enhanced_review(changed_files: List[Dict]) -> Dict:
263 """Run PRGuardian analysis on changed files.
265 Args:
266 changed_files: List of {filename, source} dicts.
268 Returns:
269 PRGuardian analysis report, or error dict.
270 """
271 try:
272 from core.platform.pr_guardian import PRGuardian
273 report = PRGuardian.analyze_diff('', changed_files)
274 comment = PRGuardian.generate_review_comment(report)
275 report['review_comment'] = comment
276 return report
277 except Exception as e:
278 return {'error': str(e), 'passed': True}
280 @staticmethod
281 def post_review(
282 repo_url: str,
283 pr_number: int,
284 review: dict,
285 ):
286 """Post review to GitHub via gh CLI."""
287 from .private_repo_access import _extract_owner_repo
288 owner_repo = _extract_owner_repo(repo_url)
289 if not owner_repo:
290 return
292 owner, repo = owner_repo
293 decision = review.get('decision', 'flag_steward')
295 # Map decision to GitHub review event
296 event_map = {
297 'approve': 'APPROVE',
298 'request_changes': 'REQUEST_CHANGES',
299 'flag_steward': 'COMMENT',
300 }
301 event = event_map.get(decision, 'COMMENT')
303 body = (
304 f"## Automated PR Review\n\n"
305 f"**Decision**: {decision.upper()}\n"
306 f"**Reason**: {review.get('reason', 'N/A')}\n\n"
307 f"### Test Results\n"
308 f"- Passed: {review.get('test_results', {}).get('passed', '?')}\n"
309 f"- Failed: {review.get('test_results', {}).get('failed', '?')}\n"
310 f"- Pass Rate: {review.get('test_results', {}).get('pass_rate', '?')}\n\n"
311 f"### Baseline Validation\n"
312 f"- Passed: {review.get('baseline_validation', {}).get('passed', '?')}\n"
313 )
315 regressions = review.get('baseline_validation', {}).get(
316 'regressions', [])
317 if regressions:
318 body += "- Regressions:\n"
319 for r in regressions[:10]:
320 body += f" - {r}\n"
322 body += (
323 f"\n### Change Complexity: "
324 f"{review.get('change_complexity', '?')}\n\n"
325 f"*Automated by HART Coding Agent*"
326 )
328 try:
329 subprocess.run(
330 ['gh', 'api', '--method', 'POST',
331 f'repos/{owner}/{repo}/pulls/{pr_number}/reviews',
332 '-f', f'event={event}',
333 '-f', f'body={body}'],
334 capture_output=True, text=True, timeout=30)
335 except Exception as e:
336 logger.debug(f"Post review failed: {e}")