Skip to content

Commit 97e6d75

Browse files
committed
Add support for subprocesses in tachyon
1 parent 519bee4 commit 97e6d75

File tree

9 files changed

+2143
-57
lines changed

9 files changed

+2143
-57
lines changed
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
"""
2+
Child process monitoring for the sampling profiler.
3+
4+
This module monitors a target process for child process creation and spawns
5+
separate profiler instances for each discovered child.
6+
"""
7+
8+
import os
9+
import sys
10+
import subprocess
11+
import threading
12+
import time
13+
14+
import _remote_debugging
15+
16+
# Polling interval for child process discovery (100ms like Austin)
17+
_CHILD_POLL_INTERVAL_SEC = 0.1
18+
19+
# Default timeout for waiting on child profilers
20+
_DEFAULT_WAIT_TIMEOUT = 30.0
21+
22+
# Maximum number of child profilers to spawn (prevents resource exhaustion)
23+
_MAX_CHILD_PROFILERS = 100
24+
25+
# Interval for cleaning up completed profilers (in polling cycles)
26+
_CLEANUP_INTERVAL_CYCLES = 10
27+
28+
29+
def get_child_pids(pid, recursive=True):
30+
"""
31+
Get all child process IDs of the given process.
32+
33+
Args:
34+
pid: Process ID of the parent process
35+
recursive: If True, return all descendants (children, grandchildren, etc.)
36+
37+
Returns:
38+
List of child PIDs
39+
"""
40+
return _remote_debugging.get_child_pids(pid, recursive=recursive)
41+
42+
43+
def is_python_process(pid):
44+
"""
45+
Quickly check if a process is a Python process.
46+
47+
This performs a two-stage check:
48+
1. Fast path: Check /proc/{pid}/exe symlink for 'python' (Linux only)
49+
2. Full probe: Attempt to locate Python runtime structures in memory
50+
51+
Args:
52+
pid: Process ID to check
53+
54+
Returns:
55+
bool: True if the process appears to be a Python process, False otherwise
56+
"""
57+
# Fast path: Check executable name on Linux (much faster than full probe)
58+
if sys.platform == "linux":
59+
try:
60+
exe_path = os.readlink(f"/proc/{pid}/exe")
61+
# Check if executable name contains 'python'
62+
exe_name = os.path.basename(exe_path).lower()
63+
if "python" not in exe_name:
64+
return False
65+
except (OSError, PermissionError):
66+
# Can't read exe link, fall through to full probe
67+
pass
68+
69+
try:
70+
# Full probe: Attempt to create a RemoteUnwinder - this will:
71+
# 1. Search for the PyRuntime section in process memory maps
72+
# 2. Read and validate debug offsets
73+
# 3. Check Python version compatibility
74+
_remote_debugging.RemoteUnwinder(pid)
75+
return True
76+
except (OSError, RuntimeError, PermissionError, ValueError):
77+
# Not a Python process or not accessible
78+
return False
79+
80+
81+
class ChildProcessMonitor:
82+
"""
83+
Monitors a target process for child processes and spawns profilers for them.
84+
85+
Use as a context manager:
86+
with ChildProcessMonitor(pid, cli_args, output_pattern) as monitor:
87+
# monitoring runs here
88+
monitor.wait_for_profilers() # optional: wait before cleanup
89+
# cleanup happens automatically
90+
"""
91+
92+
def __init__(self, pid, cli_args, output_pattern):
93+
"""
94+
Initialize the child process monitor.
95+
96+
Args:
97+
pid: Parent process ID to monitor
98+
cli_args: CLI arguments to pass to child profilers
99+
output_pattern: Pattern for output files (format string with {pid})
100+
"""
101+
self.parent_pid = pid
102+
self.cli_args = cli_args
103+
self.output_pattern = output_pattern
104+
105+
self._known_children = set()
106+
self._spawned_profilers = []
107+
self._lock = threading.Lock()
108+
self._stop_event = threading.Event()
109+
self._monitor_thread = None
110+
self._poll_count = 0
111+
112+
def __enter__(self):
113+
self._monitor_thread = threading.Thread(
114+
target=self._monitor_loop, daemon=True, name=f"child-monitor-{self.parent_pid}"
115+
)
116+
self._monitor_thread.start()
117+
return self
118+
119+
def __exit__(self, exc_type, exc_val, exc_tb):
120+
self._stop_event.set()
121+
if self._monitor_thread is not None:
122+
self._monitor_thread.join(timeout=2.0)
123+
if self._monitor_thread.is_alive():
124+
print("Warning: Monitor thread did not stop cleanly", file=sys.stderr)
125+
126+
# Wait for child profilers to complete naturally
127+
self.wait_for_profilers()
128+
129+
# Terminate any remaining profilers
130+
with self._lock:
131+
profilers_to_cleanup = list(self._spawned_profilers)
132+
self._spawned_profilers.clear()
133+
134+
for proc in profilers_to_cleanup:
135+
self._cleanup_process(proc)
136+
return False
137+
138+
def _cleanup_process(self, proc, terminate_timeout=2.0, kill_timeout=1.0):
139+
if proc.poll() is not None:
140+
return # Already terminated
141+
142+
proc.terminate()
143+
try:
144+
proc.wait(timeout=terminate_timeout)
145+
except subprocess.TimeoutExpired:
146+
proc.kill()
147+
try:
148+
proc.wait(timeout=kill_timeout)
149+
except subprocess.TimeoutExpired:
150+
# Last resort: wait indefinitely to avoid zombie
151+
# SIGKILL should always work, but we must reap the process
152+
try:
153+
proc.wait()
154+
except Exception:
155+
pass
156+
157+
@property
158+
def spawned_profilers(self):
159+
with self._lock:
160+
return list(self._spawned_profilers)
161+
162+
def wait_for_profilers(self, timeout=_DEFAULT_WAIT_TIMEOUT):
163+
"""
164+
Wait for all spawned child profilers to complete.
165+
166+
Call this before exiting the context if you want profilers to finish
167+
their work naturally rather than being terminated.
168+
169+
Args:
170+
timeout: Maximum time to wait in seconds
171+
"""
172+
profilers = self.spawned_profilers
173+
if not profilers:
174+
return
175+
176+
print(f"Waiting for {len(profilers)} child profiler(s) to complete...",
177+
file=sys.stderr)
178+
179+
deadline = time.time() + timeout
180+
for proc in profilers:
181+
remaining = deadline - time.time()
182+
if remaining <= 0:
183+
break
184+
try:
185+
proc.wait(timeout=max(0.1, remaining))
186+
except subprocess.TimeoutExpired:
187+
pass
188+
189+
def _monitor_loop(self):
190+
# Note: There is an inherent TOCTOU race between discovering a child
191+
# process and checking if it's Python. This is expected for process monitoring.
192+
while not self._stop_event.is_set():
193+
try:
194+
self._poll_count += 1
195+
196+
# Periodically clean up completed profilers to avoid memory buildup
197+
if self._poll_count % _CLEANUP_INTERVAL_CYCLES == 0:
198+
self._cleanup_completed_profilers()
199+
200+
children = set(get_child_pids(self.parent_pid, recursive=True))
201+
202+
with self._lock:
203+
new_children = children - self._known_children
204+
self._known_children.update(new_children)
205+
206+
for child_pid in new_children:
207+
# Only spawn profiler if this is actually a Python process
208+
if is_python_process(child_pid):
209+
self._spawn_profiler_for_child(child_pid)
210+
211+
except ProcessLookupError:
212+
# Parent process exited, stop monitoring
213+
break
214+
except Exception as e:
215+
# Log error but continue monitoring
216+
print(f"Warning: Error in child monitor loop: {e}", file=sys.stderr)
217+
218+
self._stop_event.wait(timeout=_CHILD_POLL_INTERVAL_SEC)
219+
220+
def _cleanup_completed_profilers(self):
221+
with self._lock:
222+
# Keep only profilers that are still running
223+
self._spawned_profilers = [
224+
p for p in self._spawned_profilers if p.poll() is None
225+
]
226+
227+
def _spawn_profiler_for_child(self, child_pid):
228+
if self._stop_event.is_set():
229+
return
230+
231+
# Check if we've reached the maximum number of child profilers
232+
with self._lock:
233+
if len(self._spawned_profilers) >= _MAX_CHILD_PROFILERS:
234+
print(f"Warning: Max child profilers ({_MAX_CHILD_PROFILERS}) reached, "
235+
f"skipping PID {child_pid}", file=sys.stderr)
236+
return
237+
238+
cmd = [
239+
sys.executable,
240+
"-m",
241+
"profiling.sampling",
242+
"attach",
243+
str(child_pid),
244+
]
245+
cmd.extend(self._build_child_cli_args(child_pid))
246+
247+
proc = None
248+
try:
249+
proc = subprocess.Popen(
250+
cmd,
251+
stdin=subprocess.DEVNULL,
252+
stdout=subprocess.DEVNULL,
253+
stderr=subprocess.DEVNULL,
254+
)
255+
with self._lock:
256+
if self._stop_event.is_set():
257+
self._cleanup_process(proc, terminate_timeout=1.0, kill_timeout=1.0)
258+
return
259+
self._spawned_profilers.append(proc)
260+
261+
print(f"Started profiler for child process {child_pid}", file=sys.stderr)
262+
except Exception as e:
263+
if proc is not None:
264+
self._cleanup_process(proc, terminate_timeout=1.0, kill_timeout=1.0)
265+
print(f"Warning: Failed to start profiler for child {child_pid}: {e}",
266+
file=sys.stderr)
267+
268+
def _build_child_cli_args(self, child_pid):
269+
args = list(self.cli_args)
270+
271+
if self.output_pattern:
272+
output_file = self.output_pattern.format(pid=child_pid)
273+
found_output = False
274+
for i, arg in enumerate(args):
275+
if arg in ("-o", "--output") and i + 1 < len(args):
276+
args[i + 1] = output_file
277+
found_output = True
278+
break
279+
if not found_output:
280+
args.extend(["-o", output_file])
281+
282+
return args

0 commit comments

Comments
 (0)