Skip to content

Commit ef51a7c

Browse files
authored
gh-138122: Make sampling profiler integration tests more resilient (#142382)
The tests were flaky on slow machines because subprocesses could finish before enough samples were collected. This adds synchronization similar to test_external_inspection: test scripts now signal when they start working, and the profiler waits for this signal before sampling. Test scripts now run in infinite loops until killed rather than for fixed iterations, ensuring the profiler always has active work to sample regardless of machine speed.
1 parent ff2577f commit ef51a7c

File tree

4 files changed

+185
-126
lines changed

4 files changed

+185
-126
lines changed

Lib/test/test_profiling/test_sampling_profiler/helpers.py

Lines changed: 87 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,88 @@
3838
SubprocessInfo = namedtuple("SubprocessInfo", ["process", "socket"])
3939

4040

41+
def _wait_for_signal(sock, expected_signals, timeout=SHORT_TIMEOUT):
42+
"""
43+
Wait for expected signal(s) from a socket with proper timeout and EOF handling.
44+
45+
Args:
46+
sock: Connected socket to read from
47+
expected_signals: Single bytes object or list of bytes objects to wait for
48+
timeout: Socket timeout in seconds
49+
50+
Returns:
51+
bytes: Complete accumulated response buffer
52+
53+
Raises:
54+
RuntimeError: If connection closed before signal received or timeout
55+
"""
56+
if isinstance(expected_signals, bytes):
57+
expected_signals = [expected_signals]
58+
59+
sock.settimeout(timeout)
60+
buffer = b""
61+
62+
while True:
63+
# Check if all expected signals are in buffer
64+
if all(sig in buffer for sig in expected_signals):
65+
return buffer
66+
67+
try:
68+
chunk = sock.recv(4096)
69+
if not chunk:
70+
raise RuntimeError(
71+
f"Connection closed before receiving expected signals. "
72+
f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
73+
)
74+
buffer += chunk
75+
except socket.timeout:
76+
raise RuntimeError(
77+
f"Timeout waiting for signals. "
78+
f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
79+
) from None
80+
except OSError as e:
81+
raise RuntimeError(
82+
f"Socket error while waiting for signals: {e}. "
83+
f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
84+
) from None
85+
86+
87+
def _cleanup_sockets(*sockets):
88+
"""Safely close multiple sockets, ignoring errors."""
89+
for sock in sockets:
90+
if sock is not None:
91+
try:
92+
sock.close()
93+
except OSError:
94+
pass
95+
96+
97+
def _cleanup_process(proc, timeout=SHORT_TIMEOUT):
98+
"""Terminate a process gracefully, escalating to kill if needed."""
99+
if proc.poll() is not None:
100+
return
101+
proc.terminate()
102+
try:
103+
proc.wait(timeout=timeout)
104+
return
105+
except subprocess.TimeoutExpired:
106+
pass
107+
proc.kill()
108+
try:
109+
proc.wait(timeout=timeout)
110+
except subprocess.TimeoutExpired:
111+
pass # Process refuses to die, nothing more we can do
112+
113+
41114
@contextlib.contextmanager
42-
def test_subprocess(script):
115+
def test_subprocess(script, wait_for_working=False):
43116
"""Context manager to create a test subprocess with socket synchronization.
44117
45118
Args:
46-
script: Python code to execute in the subprocess
119+
script: Python code to execute in the subprocess. If wait_for_working
120+
is True, script should send b"working" after starting work.
121+
wait_for_working: If True, wait for both "ready" and "working" signals.
122+
Default False for backward compatibility.
47123
48124
Yields:
49125
SubprocessInfo: Named tuple with process and socket objects
@@ -80,19 +156,18 @@ def test_subprocess(script):
80156
# Wait for process to connect and send ready signal
81157
client_socket, _ = server_socket.accept()
82158
server_socket.close()
83-
response = client_socket.recv(1024)
84-
if response != b"ready":
85-
raise RuntimeError(
86-
f"Unexpected response from subprocess: {response!r}"
87-
)
159+
server_socket = None
160+
161+
# Wait for ready signal, and optionally working signal
162+
if wait_for_working:
163+
_wait_for_signal(client_socket, [b"ready", b"working"])
164+
else:
165+
_wait_for_signal(client_socket, b"ready")
88166

89167
yield SubprocessInfo(proc, client_socket)
90168
finally:
91-
if client_socket is not None:
92-
client_socket.close()
93-
if proc.poll() is None:
94-
proc.kill()
95-
proc.wait()
169+
_cleanup_sockets(client_socket, server_socket)
170+
_cleanup_process(proc)
96171

97172

98173
def close_and_unlink(file):

Lib/test/test_profiling/test_sampling_profiler/test_advanced.py

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,32 +39,26 @@ def setUpClass(cls):
3939
import gc
4040
4141
class ExpensiveGarbage:
42-
"""Class that triggers GC with expensive finalizer (callback)."""
4342
def __init__(self):
4443
self.cycle = self
4544
4645
def __del__(self):
47-
# CPU-intensive work in the finalizer callback
4846
result = 0
4947
for i in range(100000):
5048
result += i * i
5149
if i % 1000 == 0:
5250
result = result % 1000000
5351
54-
def main_loop():
55-
"""Main loop that triggers GC with expensive callback."""
56-
while True:
57-
ExpensiveGarbage()
58-
gc.collect()
59-
60-
if __name__ == "__main__":
61-
main_loop()
52+
_test_sock.sendall(b"working")
53+
while True:
54+
ExpensiveGarbage()
55+
gc.collect()
6256
'''
6357

6458
def test_gc_frames_enabled(self):
6559
"""Test that GC frames appear when gc tracking is enabled."""
6660
with (
67-
test_subprocess(self.gc_test_script) as subproc,
61+
test_subprocess(self.gc_test_script, wait_for_working=True) as subproc,
6862
io.StringIO() as captured_output,
6963
mock.patch("sys.stdout", captured_output),
7064
):
@@ -94,7 +88,7 @@ def test_gc_frames_enabled(self):
9488
def test_gc_frames_disabled(self):
9589
"""Test that GC frames do not appear when gc tracking is disabled."""
9690
with (
97-
test_subprocess(self.gc_test_script) as subproc,
91+
test_subprocess(self.gc_test_script, wait_for_working=True) as subproc,
9892
io.StringIO() as captured_output,
9993
mock.patch("sys.stdout", captured_output),
10094
):
@@ -133,18 +127,13 @@ def setUpClass(cls):
133127
cls.native_test_script = """
134128
import operator
135129
136-
def main_loop():
137-
while True:
138-
# Native code in the middle of the stack:
139-
operator.call(inner)
140-
141130
def inner():
142-
# Python code at the top of the stack:
143131
for _ in range(1_000_0000):
144132
pass
145133
146-
if __name__ == "__main__":
147-
main_loop()
134+
_test_sock.sendall(b"working")
135+
while True:
136+
operator.call(inner)
148137
"""
149138

150139
def test_native_frames_enabled(self):
@@ -154,10 +143,7 @@ def test_native_frames_enabled(self):
154143
)
155144
self.addCleanup(close_and_unlink, collapsed_file)
156145

157-
with (
158-
test_subprocess(self.native_test_script) as subproc,
159-
):
160-
# Suppress profiler output when testing file export
146+
with test_subprocess(self.native_test_script, wait_for_working=True) as subproc:
161147
with (
162148
io.StringIO() as captured_output,
163149
mock.patch("sys.stdout", captured_output),
@@ -199,7 +185,7 @@ def test_native_frames_enabled(self):
199185
def test_native_frames_disabled(self):
200186
"""Test that native frames do not appear when native tracking is disabled."""
201187
with (
202-
test_subprocess(self.native_test_script) as subproc,
188+
test_subprocess(self.native_test_script, wait_for_working=True) as subproc,
203189
io.StringIO() as captured_output,
204190
mock.patch("sys.stdout", captured_output),
205191
):

0 commit comments

Comments
 (0)