Coverage for core / subprocess_safe.py: 37.5%
48 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-12 04:49 +0000
1"""
2core.subprocess_safe — bounded external-command execution.
4WHY THIS EXISTS
5───────────────
6`subprocess.run(cmd, capture_output=True, text=True, timeout=N)` is the
7canonical way to read a child process' stdout with a time limit. On
8Windows it has a latent failure mode that becomes load-bearing for
9nunba's pytest runs and first-boot probes: when the child is killed
10mid-initialization (e.g. nvidia-smi during driver probe, wmic on a
11cold WMI repository, sysctl on a locked macOS kernel), Python's two
12`_readerthread` daemons stay blocked in `fh.read()` because
13`Popen.kill()` does NOT close stdout/stderr pipes on the parent side.
14`subprocess.run`'s timeout handler then calls `communicate()` to drain
15them, which joins those orphaned readers → the entire call wedges for
16minutes (observed: 27 min wmic hang 2026-04-15; 5+ min nvidia-smi hang
17during tests/journey/ setup).
19CLAUDE.md Gate 7 already bans `os.popen` and `subprocess.run` without
20a timeout; this module closes the adjacent hole where the timeout
21fires but the reader-thread cleanup still hangs.
23THE FIX
24───────
25Drive Popen directly. On TimeoutExpired, kill() then **explicitly
26close** the parent-side pipe handles so any still-running reader
27thread unblocks and exits; finally `wait()` briefly to reap.
29Always returns a `BoundedResult` — never raises TimeoutExpired.
30`FileNotFoundError` propagates (caller decides "tool missing" vs
31"tool failed"), matching the semantics of the subprocess.run calls
32this replaces.
34WHO CALLS IT
35────────────
36- integrations/service_tools/vram_manager.py (nvidia-smi, rocm-smi)
37- security/system_requirements.py (_detect_camera_hw vcgencmd,
38 _detect_ram_gb sysctl fallback)
40For new callers: use `run_bounded()` from this module for any
41external-tool probe where the child can block on init. Do NOT add
42fresh `subprocess.run(..., capture_output=True, text=True, timeout=N)`
43sites — they reintroduce the reader-thread orphan.
44"""
45from __future__ import annotations
47import logging
48import subprocess
49import sys
50from typing import List, Optional, Sequence
52logger = logging.getLogger(__name__)
55def hidden_popen_kwargs() -> dict:
56 """Return Popen kwargs that hide the cmd console window on Windows.
58 Use this for any subprocess.run / subprocess.Popen / subprocess.call
59 site that:
60 - runs a console binary (where, ping, nvidia-smi, git, npm, etc.); AND
61 - is invoked from the cx_Freeze Windows GUI (no parent console);
62 otherwise a brief cmd.exe window flickers per call.
64 Usage:
65 from core.subprocess_safe import hidden_popen_kwargs
66 kw = hidden_popen_kwargs()
67 proc = subprocess.run(cmd, capture_output=True, text=True, **kw)
68 # OR
69 proc = subprocess.Popen(cmd, stdout=..., stderr=..., **kw)
71 On macOS/Linux this returns {} so the call is a no-op cross-platform.
73 Mirrors Nunba's tts/_subprocess.py::hidden_startupinfo() but exposes
74 a kwargs dict (more ergonomic for **kw merging). run_bounded() and
75 run_with_timeout() in this module already inline the same flags.
76 """
77 if sys.platform != "win32":
78 return {}
79 si = subprocess.STARTUPINFO()
80 si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
81 si.wShowWindow = 0
82 return {
83 "startupinfo": si,
84 "creationflags": subprocess.CREATE_NO_WINDOW,
85 }
88class BoundedResult:
89 """Minimal CompletedProcess-shaped result.
91 Exposes `returncode`, `stdout`, `stderr` (both str), and
92 `timed_out` (True when the child was killed by the watchdog).
93 """
94 __slots__ = ("returncode", "stdout", "stderr", "timed_out")
96 def __init__(
97 self,
98 returncode: int,
99 stdout: str,
100 stderr: str,
101 timed_out: bool = False,
102 ) -> None:
103 self.returncode = returncode
104 self.stdout = stdout
105 self.stderr = stderr
106 self.timed_out = timed_out
109def run_bounded(
110 cmd: Sequence[str],
111 timeout: float = 5.0,
112 *,
113 wait_after_kill: float = 2.0,
114) -> BoundedResult:
115 """Run `cmd` with a hard timeout and reader-thread-safe cleanup.
117 Unlike ``subprocess.run(..., capture_output=True, text=True,
118 timeout=N)``, this helper explicitly closes the parent-side stdout
119 / stderr pipes after killing a timed-out child. That releases the
120 OS handles the `_readerthread` daemons are blocked on, so they
121 unblock and exit instead of wedging the caller forever.
123 Args:
124 cmd: argv list — never a shell string.
125 timeout: seconds to wait for the child's natural exit.
126 wait_after_kill: seconds to wait for proc cleanup after kill()
127 before giving up and letting the OS reap a zombie.
129 Returns:
130 BoundedResult with .returncode, .stdout, .stderr, .timed_out.
131 On timeout: returncode=-1, timed_out=True, output fields empty.
133 Raises:
134 FileNotFoundError: cmd[0] not on PATH (caller handles).
135 OSError: other Popen spawn failure (caller handles).
136 """
137 popen_kwargs = {
138 "stdout": subprocess.PIPE,
139 "stderr": subprocess.PIPE,
140 "stdin": subprocess.DEVNULL,
141 "text": True,
142 }
143 if sys.platform == "win32":
144 si = subprocess.STARTUPINFO()
145 si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
146 si.wShowWindow = 0
147 popen_kwargs["startupinfo"] = si
148 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
150 # FileNotFoundError / OSError from Popen propagate — callers that
151 # already do `except FileNotFoundError: pass` still work unchanged.
152 proc = subprocess.Popen(list(cmd), **popen_kwargs)
154 try:
155 stdout, stderr = proc.communicate(timeout=timeout)
156 return BoundedResult(
157 returncode=proc.returncode,
158 stdout=stdout or "",
159 stderr=stderr or "",
160 timed_out=False,
161 )
162 except subprocess.TimeoutExpired:
163 _safe_kill_and_close(proc, cmd[0] if cmd else "<unknown>",
164 wait_after_kill=wait_after_kill)
165 return BoundedResult(
166 returncode=-1, stdout="", stderr="", timed_out=True,
167 )
170def _safe_kill_and_close(
171 proc: "subprocess.Popen[str]",
172 cmd_name: str,
173 *,
174 wait_after_kill: float,
175) -> None:
176 """Kill proc, close pipes, bounded wait — no exception escapes.
178 The explicit close() on stdout/stderr is the load-bearing line:
179 without it, Python's _readerthread daemons stay blocked in
180 fh.read() after the child dies, and join() wedges. Closing the
181 parent FD causes the read() to return EOF → thread exits cleanly.
182 """
183 logger.warning(
184 "subprocess %s exceeded timeout; killing + closing pipes "
185 "to unblock reader threads", cmd_name,
186 )
187 try:
188 proc.kill()
189 except Exception:
190 pass
191 for fh in (proc.stdout, proc.stderr):
192 try:
193 if fh is not None and not fh.closed:
194 fh.close()
195 except Exception:
196 pass
197 try:
198 proc.wait(timeout=wait_after_kill)
199 except subprocess.TimeoutExpired:
200 logger.warning(
201 "subprocess %s did not exit within %.1fs after kill; "
202 "leaving as zombie (OS will reap)",
203 cmd_name, wait_after_kill,
204 )
205 except Exception:
206 pass
209__all__ = ["BoundedResult", "run_bounded"]