Coverage for integrations / agent_engine / pr_review_service.py: 73.5%

117 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2PR Review Service — Coding agent reviews PRs against baselines 

3 

4Auto-approves simple changes with passing tests and no regression. 

5Flags complex changes for steward review. 

6Auto-rejects build breakers (test failures or baseline regressions). 

7 

8Decision Matrix: 

9 Tests Pass + No Regression + Simple → AUTO-APPROVE 

10 Tests Pass + No Regression + Complex → FLAG for steward 

11 Tests Pass + Regression → AUTO-REJECT (build breaker) 

12 Tests Fail → AUTO-REJECT (build breaker) 

13""" 

14import json 

15import logging 

16import os 

17import subprocess 

18from typing import Dict, List, Optional 

19 

20logger = logging.getLogger('hevolve_social') 

21 

22 

23class PRReviewService: 

24 """Coding agent reviews PRs against baselines. Static methods only.""" 

25 

26 @staticmethod 

27 def review_pr(repo_url: str, pr_number: int) -> Dict: 

28 """Full PR review pipeline. 

29 

30 1. Fetch PR diff stats 

31 2. Run pre-commit checks (lint, format) 

32 3. Run test suite 

33 4. Validate baseline (no regression) 

34 5. Classify change complexity 

35 6. Decide: auto-approve / flag_steward / request_changes 

36 """ 

37 # 1. Fetch PR diff 

38 diff_stats = PRReviewService._fetch_pr_diff(repo_url, pr_number) 

39 if diff_stats.get('error'): 

40 return {'decision': 'error', 'error': diff_stats['error']} 

41 

42 # 2. Pre-commit checks 

43 precommit = PRReviewService.run_pre_commit_checks() 

44 

45 # 3. Test suite 

46 test_results = PRReviewService.run_test_suite() 

47 

48 # 4. Baseline validation 

49 baseline = PRReviewService.validate_baseline() 

50 

51 # 5. Classify complexity 

52 complexity = PRReviewService.classify_change(diff_stats) 

53 

54 # 6. Decision 

55 tests_pass = test_results.get('pass_rate', 0) >= 0.95 

56 no_regression = baseline.get('passed', True) 

57 is_simple = complexity == 'simple' 

58 

59 if not tests_pass: 

60 decision = 'request_changes' 

61 reason = 'Build breaker: tests failing' 

62 elif not no_regression: 

63 decision = 'request_changes' 

64 reason = (f'Baseline regression detected: ' 

65 f'{baseline.get("regressions", [])}') 

66 elif is_simple: 

67 decision = 'approve' 

68 reason = 'Simple change, tests pass, no regression' 

69 else: 

70 decision = 'flag_steward' 

71 reason = f'Complex change ({complexity}), needs steward review' 

72 

73 review = { 

74 'decision': decision, 

75 'reason': reason, 

76 'pr_number': pr_number, 

77 'diff_stats': diff_stats, 

78 'precommit': precommit, 

79 'test_results': test_results, 

80 'baseline_validation': baseline, 

81 'change_complexity': complexity, 

82 } 

83 

84 # Post review to GitHub 

85 try: 

86 PRReviewService.post_review(repo_url, pr_number, review) 

87 except Exception as e: 

88 logger.debug(f"Failed to post review: {e}") 

89 review['review_posted'] = False 

90 

91 return review 

92 

93 @staticmethod 

94 def _fetch_pr_diff(repo_url: str, pr_number: int) -> Dict: 

95 """Fetch PR diff stats via gh CLI.""" 

96 try: 

97 from .private_repo_access import _extract_owner_repo 

98 owner_repo = _extract_owner_repo(repo_url) 

99 if not owner_repo: 

100 return {'error': f'Cannot parse repo: {repo_url}'} 

101 

102 owner, repo = owner_repo 

103 

104 result = subprocess.run( 

105 ['gh', 'api', 

106 f'repos/{owner}/{repo}/pulls/{pr_number}', 

107 '--jq', 

108 '{files_changed: .changed_files, ' 

109 'additions: .additions, ' 

110 'deletions: .deletions}'], 

111 capture_output=True, text=True, timeout=30) 

112 

113 if result.returncode == 0: 

114 return json.loads(result.stdout) 

115 return {'error': result.stderr[:200], 

116 'files_changed': 0, 'additions': 0, 'deletions': 0} 

117 except Exception as e: 

118 return {'error': str(e), 

119 'files_changed': 0, 'additions': 0, 'deletions': 0} 

120 

121 @staticmethod 

122 def run_pre_commit_checks(repo_path: str = '.') -> Dict: 

123 """Run lint and format checks.""" 

124 issues = [] 

125 

126 # Try ruff lint 

127 try: 

128 result = subprocess.run( 

129 ['ruff', 'check', repo_path, '--select', 'E,W,F'], 

130 capture_output=True, text=True, timeout=120) 

131 if result.returncode != 0: 

132 issues.append({ 

133 'tool': 'ruff', 

134 'output': result.stdout[:500], 

135 }) 

136 except FileNotFoundError: 

137 pass # ruff not installed 

138 except Exception: 

139 pass 

140 

141 return { 

142 'passed': len(issues) == 0, 

143 'issues': issues, 

144 } 

145 

146 @staticmethod 

147 def run_test_suite(repo_path: str = '.') -> Dict: 

148 """Run pytest and capture results.""" 

149 try: 

150 import sys as _sys 

151 python = _sys.executable # Always use current interpreter, not env var 

152 result = subprocess.run( 

153 [python, '-m', 'pytest', 'tests/', '-v', '-s', 

154 '--tb=short', '-q'], 

155 capture_output=True, text=True, timeout=600, 

156 cwd=repo_path) 

157 

158 # Parse pytest output 

159 output = result.stdout + result.stderr 

160 passed = 0 

161 failed = 0 

162 

163 import re as _re 

164 for line in output.split('\n'): 

165 line = line.strip() 

166 if ' passed' in line or ' failed' in line: 

167 # Match patterns like "100 passed" or "5 failed" 

168 m_pass = _re.search(r'(\d+)\s+passed', line) 

169 m_fail = _re.search(r'(\d+)\s+failed', line) 

170 if m_pass: 

171 passed = int(m_pass.group(1)) 

172 if m_fail: 

173 failed = int(m_fail.group(1)) 

174 

175 total = passed + failed 

176 pass_rate = passed / max(1, total) 

177 

178 return { 

179 'passed': passed, 

180 'failed': failed, 

181 'total': total, 

182 'pass_rate': round(pass_rate, 4), 

183 'returncode': result.returncode, 

184 } 

185 except Exception as e: 

186 return { 

187 'passed': 0, 'failed': 0, 'total': 0, 

188 'pass_rate': 0.0, 

189 'error': str(e), 

190 } 

191 

192 @staticmethod 

193 def validate_baseline(repo_path: str = '.') -> Dict: 

194 """Validate all active agents against their baselines.""" 

195 try: 

196 from .agent_baseline_service import AgentBaselineService, BASELINE_DIR 

197 from pathlib import Path 

198 

199 baseline_dir = Path(BASELINE_DIR) 

200 if not baseline_dir.exists(): 

201 return {'passed': True, 'regressions': [], 

202 'reason': 'No baselines to validate'} 

203 

204 all_regressions = [] 

205 for agent_dir in baseline_dir.iterdir(): 

206 if not agent_dir.is_dir(): 

207 continue 

208 parts = agent_dir.name.rsplit('_', 1) 

209 if len(parts) != 2: 

210 continue 

211 prompt_id, flow_id_str = parts 

212 try: 

213 flow_id = int(flow_id_str) 

214 except ValueError: 

215 continue 

216 

217 result = AgentBaselineService.validate_against_baseline( 

218 prompt_id, flow_id) 

219 if result and not result.get('passed', True): 

220 all_regressions.extend([ 

221 f'{agent_dir.name}: {r}' 

222 for r in result.get('regressions', []) 

223 ]) 

224 

225 return { 

226 'passed': len(all_regressions) == 0, 

227 'regressions': all_regressions, 

228 } 

229 except Exception as e: 

230 return {'passed': True, 'regressions': [], 

231 'error': str(e)} 

232 

233 @staticmethod 

234 def classify_change(diff_stats: dict) -> str: 

235 """Classify change complexity. 

236 

237 simple: <= 3 files, < 100 lines 

238 moderate: <= 10 files, < 500 lines 

239 complex: > 10 files or > 500 lines 

240 

241 Also bumps to 'complex' if cyclomatic complexity exceeds 

242 thresholds (via PRGuardian). 

243 """ 

244 files = diff_stats.get('files_changed', 0) 

245 lines = (diff_stats.get('additions', 0) + 

246 diff_stats.get('deletions', 0)) 

247 

248 if files <= 3 and lines < 100: 

249 base = 'simple' 

250 elif files <= 10 and lines < 500: 

251 base = 'moderate' 

252 else: 

253 base = 'complex' 

254 

255 # If guardian analysis found violations, bump to at least moderate 

256 if diff_stats.get('guardian_violations', 0) > 0 and base == 'simple': 

257 return 'moderate' 

258 

259 return base 

260 

261 @staticmethod 

262 def enhanced_review(changed_files: List[Dict]) -> Dict: 

263 """Run PRGuardian analysis on changed files. 

264 

265 Args: 

266 changed_files: List of {filename, source} dicts. 

267 

268 Returns: 

269 PRGuardian analysis report, or error dict. 

270 """ 

271 try: 

272 from core.platform.pr_guardian import PRGuardian 

273 report = PRGuardian.analyze_diff('', changed_files) 

274 comment = PRGuardian.generate_review_comment(report) 

275 report['review_comment'] = comment 

276 return report 

277 except Exception as e: 

278 return {'error': str(e), 'passed': True} 

279 

280 @staticmethod 

281 def post_review( 

282 repo_url: str, 

283 pr_number: int, 

284 review: dict, 

285 ): 

286 """Post review to GitHub via gh CLI.""" 

287 from .private_repo_access import _extract_owner_repo 

288 owner_repo = _extract_owner_repo(repo_url) 

289 if not owner_repo: 

290 return 

291 

292 owner, repo = owner_repo 

293 decision = review.get('decision', 'flag_steward') 

294 

295 # Map decision to GitHub review event 

296 event_map = { 

297 'approve': 'APPROVE', 

298 'request_changes': 'REQUEST_CHANGES', 

299 'flag_steward': 'COMMENT', 

300 } 

301 event = event_map.get(decision, 'COMMENT') 

302 

303 body = ( 

304 f"## Automated PR Review\n\n" 

305 f"**Decision**: {decision.upper()}\n" 

306 f"**Reason**: {review.get('reason', 'N/A')}\n\n" 

307 f"### Test Results\n" 

308 f"- Passed: {review.get('test_results', {}).get('passed', '?')}\n" 

309 f"- Failed: {review.get('test_results', {}).get('failed', '?')}\n" 

310 f"- Pass Rate: {review.get('test_results', {}).get('pass_rate', '?')}\n\n" 

311 f"### Baseline Validation\n" 

312 f"- Passed: {review.get('baseline_validation', {}).get('passed', '?')}\n" 

313 ) 

314 

315 regressions = review.get('baseline_validation', {}).get( 

316 'regressions', []) 

317 if regressions: 

318 body += "- Regressions:\n" 

319 for r in regressions[:10]: 

320 body += f" - {r}\n" 

321 

322 body += ( 

323 f"\n### Change Complexity: " 

324 f"{review.get('change_complexity', '?')}\n\n" 

325 f"*Automated by HART Coding Agent*" 

326 ) 

327 

328 try: 

329 subprocess.run( 

330 ['gh', 'api', '--method', 'POST', 

331 f'repos/{owner}/{repo}/pulls/{pr_number}/reviews', 

332 '-f', f'event={event}', 

333 '-f', f'body={body}'], 

334 capture_output=True, text=True, timeout=30) 

335 except Exception as e: 

336 logger.debug(f"Post review failed: {e}")