Coverage for core / subprocess_safe.py: 37.5%

48 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1""" 

2core.subprocess_safe — bounded external-command execution. 

3 

4WHY THIS EXISTS 

5─────────────── 

6`subprocess.run(cmd, capture_output=True, text=True, timeout=N)` is the 

7canonical way to read a child process' stdout with a time limit. On 

8Windows it has a latent failure mode that becomes load-bearing for 

9nunba's pytest runs and first-boot probes: when the child is killed 

10mid-initialization (e.g. nvidia-smi during driver probe, wmic on a 

11cold WMI repository, sysctl on a locked macOS kernel), Python's two 

12`_readerthread` daemons stay blocked in `fh.read()` because 

13`Popen.kill()` does NOT close stdout/stderr pipes on the parent side. 

14`subprocess.run`'s timeout handler then calls `communicate()` to drain 

15them, which joins those orphaned readers → the entire call wedges for 

16minutes (observed: 27 min wmic hang 2026-04-15; 5+ min nvidia-smi hang 

17during tests/journey/ setup). 

18 

19CLAUDE.md Gate 7 already bans `os.popen` and `subprocess.run` without 

20a timeout; this module closes the adjacent hole where the timeout 

21fires but the reader-thread cleanup still hangs. 

22 

23THE FIX 

24─────── 

25Drive Popen directly. On TimeoutExpired, kill() then **explicitly 

26close** the parent-side pipe handles so any still-running reader 

27thread unblocks and exits; finally `wait()` briefly to reap. 

28 

29Always returns a `BoundedResult` — never raises TimeoutExpired. 

30`FileNotFoundError` propagates (caller decides "tool missing" vs 

31"tool failed"), matching the semantics of the subprocess.run calls 

32this replaces. 

33 

34WHO CALLS IT 

35──────────── 

36- integrations/service_tools/vram_manager.py (nvidia-smi, rocm-smi) 

37- security/system_requirements.py (_detect_camera_hw vcgencmd, 

38 _detect_ram_gb sysctl fallback) 

39 

40For new callers: use `run_bounded()` from this module for any 

41external-tool probe where the child can block on init. Do NOT add 

42fresh `subprocess.run(..., capture_output=True, text=True, timeout=N)` 

43sites — they reintroduce the reader-thread orphan. 

44""" 

45from __future__ import annotations 

46 

47import logging 

48import subprocess 

49import sys 

50from typing import List, Optional, Sequence 

51 

52logger = logging.getLogger(__name__) 

53 

54 

55def hidden_popen_kwargs() -> dict: 

56 """Return Popen kwargs that hide the cmd console window on Windows. 

57 

58 Use this for any subprocess.run / subprocess.Popen / subprocess.call 

59 site that: 

60 - runs a console binary (where, ping, nvidia-smi, git, npm, etc.); AND 

61 - is invoked from the cx_Freeze Windows GUI (no parent console); 

62 otherwise a brief cmd.exe window flickers per call. 

63 

64 Usage: 

65 from core.subprocess_safe import hidden_popen_kwargs 

66 kw = hidden_popen_kwargs() 

67 proc = subprocess.run(cmd, capture_output=True, text=True, **kw) 

68 # OR 

69 proc = subprocess.Popen(cmd, stdout=..., stderr=..., **kw) 

70 

71 On macOS/Linux this returns {} so the call is a no-op cross-platform. 

72 

73 Mirrors Nunba's tts/_subprocess.py::hidden_startupinfo() but exposes 

74 a kwargs dict (more ergonomic for **kw merging). run_bounded() and 

75 run_with_timeout() in this module already inline the same flags. 

76 """ 

77 if sys.platform != "win32": 

78 return {} 

79 si = subprocess.STARTUPINFO() 

80 si.dwFlags |= subprocess.STARTF_USESHOWWINDOW 

81 si.wShowWindow = 0 

82 return { 

83 "startupinfo": si, 

84 "creationflags": subprocess.CREATE_NO_WINDOW, 

85 } 

86 

87 

88class BoundedResult: 

89 """Minimal CompletedProcess-shaped result. 

90 

91 Exposes `returncode`, `stdout`, `stderr` (both str), and 

92 `timed_out` (True when the child was killed by the watchdog). 

93 """ 

94 __slots__ = ("returncode", "stdout", "stderr", "timed_out") 

95 

96 def __init__( 

97 self, 

98 returncode: int, 

99 stdout: str, 

100 stderr: str, 

101 timed_out: bool = False, 

102 ) -> None: 

103 self.returncode = returncode 

104 self.stdout = stdout 

105 self.stderr = stderr 

106 self.timed_out = timed_out 

107 

108 

109def run_bounded( 

110 cmd: Sequence[str], 

111 timeout: float = 5.0, 

112 *, 

113 wait_after_kill: float = 2.0, 

114) -> BoundedResult: 

115 """Run `cmd` with a hard timeout and reader-thread-safe cleanup. 

116 

117 Unlike ``subprocess.run(..., capture_output=True, text=True, 

118 timeout=N)``, this helper explicitly closes the parent-side stdout 

119 / stderr pipes after killing a timed-out child. That releases the 

120 OS handles the `_readerthread` daemons are blocked on, so they 

121 unblock and exit instead of wedging the caller forever. 

122 

123 Args: 

124 cmd: argv list — never a shell string. 

125 timeout: seconds to wait for the child's natural exit. 

126 wait_after_kill: seconds to wait for proc cleanup after kill() 

127 before giving up and letting the OS reap a zombie. 

128 

129 Returns: 

130 BoundedResult with .returncode, .stdout, .stderr, .timed_out. 

131 On timeout: returncode=-1, timed_out=True, output fields empty. 

132 

133 Raises: 

134 FileNotFoundError: cmd[0] not on PATH (caller handles). 

135 OSError: other Popen spawn failure (caller handles). 

136 """ 

137 popen_kwargs = { 

138 "stdout": subprocess.PIPE, 

139 "stderr": subprocess.PIPE, 

140 "stdin": subprocess.DEVNULL, 

141 "text": True, 

142 } 

143 if sys.platform == "win32": 

144 si = subprocess.STARTUPINFO() 

145 si.dwFlags |= subprocess.STARTF_USESHOWWINDOW 

146 si.wShowWindow = 0 

147 popen_kwargs["startupinfo"] = si 

148 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW 

149 

150 # FileNotFoundError / OSError from Popen propagate — callers that 

151 # already do `except FileNotFoundError: pass` still work unchanged. 

152 proc = subprocess.Popen(list(cmd), **popen_kwargs) 

153 

154 try: 

155 stdout, stderr = proc.communicate(timeout=timeout) 

156 return BoundedResult( 

157 returncode=proc.returncode, 

158 stdout=stdout or "", 

159 stderr=stderr or "", 

160 timed_out=False, 

161 ) 

162 except subprocess.TimeoutExpired: 

163 _safe_kill_and_close(proc, cmd[0] if cmd else "<unknown>", 

164 wait_after_kill=wait_after_kill) 

165 return BoundedResult( 

166 returncode=-1, stdout="", stderr="", timed_out=True, 

167 ) 

168 

169 

170def _safe_kill_and_close( 

171 proc: "subprocess.Popen[str]", 

172 cmd_name: str, 

173 *, 

174 wait_after_kill: float, 

175) -> None: 

176 """Kill proc, close pipes, bounded wait — no exception escapes. 

177 

178 The explicit close() on stdout/stderr is the load-bearing line: 

179 without it, Python's _readerthread daemons stay blocked in 

180 fh.read() after the child dies, and join() wedges. Closing the 

181 parent FD causes the read() to return EOF → thread exits cleanly. 

182 """ 

183 logger.warning( 

184 "subprocess %s exceeded timeout; killing + closing pipes " 

185 "to unblock reader threads", cmd_name, 

186 ) 

187 try: 

188 proc.kill() 

189 except Exception: 

190 pass 

191 for fh in (proc.stdout, proc.stderr): 

192 try: 

193 if fh is not None and not fh.closed: 

194 fh.close() 

195 except Exception: 

196 pass 

197 try: 

198 proc.wait(timeout=wait_after_kill) 

199 except subprocess.TimeoutExpired: 

200 logger.warning( 

201 "subprocess %s did not exit within %.1fs after kill; " 

202 "leaving as zombie (OS will reap)", 

203 cmd_name, wait_after_kill, 

204 ) 

205 except Exception: 

206 pass 

207 

208 

209__all__ = ["BoundedResult", "run_bounded"]