Coverage for integrations / agent_engine / auto_deploy_service.py: 0.0%

147 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2Auto Deploy Service — Build, sign, and deploy on PR merge 

3 

4Triggered when a PR merges to main. Runs full test suite, captures benchmark 

5snapshot, verifies safety via is_upgrade_safe(), signs the release manifest 

6using scripts/sign_release.py, and notifies all nodes via gossip protocol. 

7 

8Each node receives the version notification and auto-updates after verifying 

9the release manifest signature. 

10""" 

11import json 

12import logging 

13import os 

14import subprocess 

15import sys 

16import time 

17 

18# Windows: suppress console windows for all subprocess calls 

19_SUBPROCESS_KW = {} 

20if sys.platform == 'win32': 

21 _SUBPROCESS_KW['creationflags'] = subprocess.CREATE_NO_WINDOW 

22from typing import Dict, Optional 

23 

24logger = logging.getLogger('hevolve_social') 

25 

26 

27class AutoDeployService: 

28 """Triggered when PR merges to main. Static methods only.""" 

29 

30 @staticmethod 

31 def on_pr_merged(repo_url: str, merge_sha: str) -> Dict: 

32 """Triggered by GitHub webhook or polling. 

33 

34 1. Pull latest code 

35 2. Run full test suite (regression gate) 

36 3. Capture benchmark snapshot for new version 

37 4. Compare vs previous version (is_upgrade_safe) 

38 5. If safe: sign release manifest 

39 6. Distribute update notification via gossip 

40 """ 

41 result = { 

42 'merge_sha': merge_sha, 

43 'deployed': False, 

44 'steps': {}, 

45 } 

46 

47 # 1. Pull latest code 

48 try: 

49 pull = subprocess.run( 

50 ['git', 'pull', 'origin', 'main'], 

51 capture_output=True, text=True, timeout=120, 

52 **_SUBPROCESS_KW) 

53 result['steps']['git_pull'] = { 

54 'success': pull.returncode == 0, 

55 'output': pull.stdout[:200], 

56 } 

57 if pull.returncode != 0: 

58 result['error'] = 'Git pull failed' 

59 return result 

60 except Exception as e: 

61 result['steps']['git_pull'] = {'success': False, 'error': str(e)} 

62 result['error'] = f'Git pull failed: {e}' 

63 return result 

64 

65 # 2. Run test suite 

66 try: 

67 from .pr_review_service import PRReviewService 

68 test_results = PRReviewService.run_test_suite() 

69 result['steps']['tests'] = test_results 

70 

71 if test_results.get('pass_rate', 0) < 0.95: 

72 result['error'] = ( 

73 f"Test suite failed: {test_results.get('failed', 0)} " 

74 f"failures, pass_rate={test_results.get('pass_rate', 0)}") 

75 return result 

76 except Exception as e: 

77 result['steps']['tests'] = {'error': str(e)} 

78 result['error'] = f'Test suite error: {e}' 

79 return result 

80 

81 # 3. Capture benchmark snapshot 

82 new_version = merge_sha[:8] 

83 try: 

84 from .benchmark_registry import get_benchmark_registry 

85 registry = get_benchmark_registry() 

86 snapshot = registry.capture_snapshot( 

87 version=new_version, tier='fast') 

88 result['steps']['benchmark'] = { 

89 'version': new_version, 

90 'captured': bool(snapshot), 

91 } 

92 except Exception as e: 

93 result['steps']['benchmark'] = {'error': str(e)} 

94 

95 # 4. Check upgrade safety 

96 try: 

97 from .benchmark_registry import get_benchmark_registry 

98 registry = get_benchmark_registry() 

99 safe = registry.is_upgrade_safe(new_version) 

100 result['steps']['upgrade_safe'] = safe 

101 

102 if not safe.get('safe', True): 

103 result['error'] = ( 

104 f"Upgrade not safe: {safe.get('regressions', [])}") 

105 return result 

106 except Exception as e: 

107 result['steps']['upgrade_safe'] = {'error': str(e)} 

108 # Continue — missing benchmark data should not block deploy 

109 

110 # 5. Sign release manifest — MUST succeed before gossip 

111 manifest = None 

112 try: 

113 manifest = AutoDeployService._sign_release(new_version, merge_sha) 

114 result['steps']['sign'] = { 

115 'signed': manifest is not None and manifest.get('signed', False), 

116 } 

117 except Exception as e: 

118 result['steps']['sign'] = {'error': str(e)} 

119 

120 if not manifest or not manifest.get('signed') or not manifest.get('signature'): 

121 result['error'] = 'Release signing failed — aborting deploy (no unsigned gossip)' 

122 result['deployed'] = False 

123 return result 

124 

125 # 6. Notify nodes via gossip (signed manifest only) 

126 nodes_notified = 0 

127 try: 

128 nodes_notified = AutoDeployService.notify_nodes( 

129 new_version, manifest) 

130 result['steps']['gossip'] = { 

131 'nodes_notified': nodes_notified, 

132 } 

133 except Exception as e: 

134 result['steps']['gossip'] = {'error': str(e)} 

135 

136 result['deployed'] = True 

137 result['version'] = new_version 

138 result['nodes_notified'] = nodes_notified 

139 return result 

140 

141 @staticmethod 

142 def _sign_release(version: str, merge_sha: str) -> Optional[Dict]: 

143 """Sign release manifest using scripts/sign_release.py.""" 

144 manifest = { 

145 'version': version, 

146 'merge_sha': merge_sha, 

147 'timestamp': time.time(), 

148 } 

149 

150 # Get code hash 

151 try: 

152 from security.node_integrity import compute_code_hash 

153 manifest['code_hash'] = compute_code_hash() 

154 except Exception: 

155 pass 

156 

157 # Sign via release script if available 

158 script_path = os.path.join('scripts', 'sign_release.py') 

159 if os.path.exists(script_path): 

160 try: 

161 python = os.environ.get('HEVOLVE_PYTHON', 'python') 

162 result = subprocess.run( 

163 [python, script_path, '--version', version], 

164 capture_output=True, text=True, timeout=60, 

165 **_SUBPROCESS_KW) 

166 if result.returncode == 0: 

167 manifest['signed'] = True 

168 # Parse signature from output if available 

169 for line in result.stdout.split('\n'): 

170 if line.startswith('signature='): 

171 manifest['signature'] = line.split('=', 1)[1] 

172 except Exception as e: 

173 logger.debug(f"Release signing failed: {e}") 

174 

175 return manifest 

176 

177 @staticmethod 

178 def notify_nodes(version: str, manifest: dict) -> int: 

179 """Use gossip protocol to notify all peers of new version. 

180 

181 Each notification is signed with this node's Ed25519 key so peers 

182 can verify the sender before accepting the update. 

183 """ 

184 notified = 0 

185 

186 # Sign the notification payload with this node's key 

187 node_signature = None 

188 node_id = None 

189 try: 

190 from security.node_integrity import get_node_identity 

191 identity = get_node_identity() 

192 node_id = identity.get('node_id', '') 

193 import json as _json 

194 payload_bytes = _json.dumps( 

195 {'version': version, 'manifest_hash': manifest.get('code_hash', '')}, 

196 sort_keys=True).encode() 

197 from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey 

198 private_key = identity.get('_private_key') 

199 if private_key and isinstance(private_key, Ed25519PrivateKey): 

200 node_signature = private_key.sign(payload_bytes).hex() 

201 except Exception as e: 

202 logger.debug(f"Node signature for gossip failed: {e}") 

203 

204 try: 

205 from integrations.social.models import get_db, PeerNode 

206 db = get_db() 

207 try: 

208 peers = db.query(PeerNode).filter( 

209 PeerNode.status == 'active').all() 

210 for peer in peers: 

211 if not peer.url: 

212 continue 

213 try: 

214 from core.http_pool import pooled_post 

215 resp = pooled_post( 

216 f'{peer.url}/api/social/deploy/version-update', 

217 json={ 

218 'type': 'version_update', 

219 'version': version, 

220 'manifest': manifest, 

221 'sender_node_id': node_id, 

222 'sender_signature': node_signature, 

223 }, 

224 timeout=10, 

225 ) 

226 if resp.status_code == 200: 

227 notified += 1 

228 except Exception: 

229 pass 

230 finally: 

231 db.close() 

232 except Exception as e: 

233 logger.debug(f"Node notification failed: {e}") 

234 

235 logger.info(f"Version update {version}: notified {notified} nodes") 

236 return notified 

237 

238 @staticmethod 

239 def auto_update_node(version: str, manifest: dict) -> Dict: 

240 """Called on each node when update notification received. 

241 

242 1. Verify release manifest signature 

243 2. Compare code_hash 

244 3. If different + verified: pull latest code 

245 4. Restart services gracefully 

246 """ 

247 result = {'updated': False, 'version': version} 

248 

249 # 1. Verify manifest signature — ALWAYS required (no unsigned bypass) 

250 if not manifest.get('signed') or not manifest.get('signature'): 

251 result['error'] = 'Unsigned manifest rejected — signature required' 

252 return result 

253 try: 

254 from security.master_key import verify_release_manifest 

255 if not verify_release_manifest(manifest): 

256 result['error'] = 'Invalid release manifest signature' 

257 return result 

258 except ImportError: 

259 result['error'] = 'Security module unavailable for verification' 

260 return result 

261 except Exception as e: 

262 result['error'] = f'Manifest verification failed: {e}' 

263 return result 

264 

265 # 2. Compare code hash 

266 try: 

267 from security.node_integrity import compute_code_hash 

268 current_hash = compute_code_hash() 

269 manifest_hash = manifest.get('code_hash', '') 

270 if current_hash == manifest_hash: 

271 result['reason'] = 'Already up to date' 

272 return result 

273 result['old_hash'] = current_hash 

274 result['new_hash'] = manifest_hash 

275 except Exception: 

276 pass 

277 

278 # 3. Pull and checkout pinned commit (prevent TOCTOU) 

279 manifest_sha = manifest.get('merge_sha', '') 

280 try: 

281 # Fetch first, then checkout exact commit from manifest 

282 fetch = subprocess.run( 

283 ['git', 'fetch', 'origin', 'main'], 

284 capture_output=True, text=True, timeout=120, 

285 **_SUBPROCESS_KW) 

286 if fetch.returncode != 0: 

287 result['error'] = f'Git fetch failed: {fetch.stderr[:200]}' 

288 return result 

289 if manifest_sha: 

290 checkout = subprocess.run( 

291 ['git', 'checkout', manifest_sha], 

292 capture_output=True, text=True, timeout=30, 

293 **_SUBPROCESS_KW) 

294 if checkout.returncode != 0: 

295 result['error'] = f'Checkout pinned SHA failed: {checkout.stderr[:200]}' 

296 return result 

297 else: 

298 pull = subprocess.run( 

299 ['git', 'pull', 'origin', 'main'], 

300 capture_output=True, text=True, timeout=120, 

301 **_SUBPROCESS_KW) 

302 if pull.returncode != 0: 

303 result['error'] = f'Git pull failed: {pull.stderr[:200]}' 

304 return result 

305 except Exception as e: 

306 result['error'] = f'Git update failed: {e}' 

307 return result 

308 

309 # 4. Graceful restart via watchdog 

310 try: 

311 from security.node_watchdog import NodeWatchdog 

312 watchdog = NodeWatchdog.get_instance() 

313 if watchdog: 

314 watchdog.request_restart('version_update') 

315 except Exception as e: 

316 logger.debug(f"Watchdog restart request failed: {e}") 

317 

318 result['updated'] = True 

319 result['old_version'] = result.get('old_hash', 'unknown')[:8] 

320 result['new_version'] = version 

321 return result