Skip to content

Commit e9ec2fa

Browse files
committed
gh-91048: Fix external inspection multi-threaded performance
1 parent 642e5df commit e9ec2fa

File tree

4 files changed

+222
-20
lines changed

4 files changed

+222
-20
lines changed

Lib/test/test_external_inspection.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import socket
77
import threading
8+
import time
89
from asyncio import staggered, taskgroups, base_events, tasks
910
from unittest.mock import ANY
1011
from test.support import os_helper, SHORT_TIMEOUT, busy_retry
@@ -876,6 +877,122 @@ def test_self_trace(self):
876877
],
877878
)
878879

880+
@skip_if_not_supported
881+
@unittest.skipIf(
882+
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
883+
"Test only runs on Linux with process_vm_readv support",
884+
)
885+
def test_only_active_thread(self):
886+
# Test that only_active_thread parameter works correctly
887+
port = find_unused_port()
888+
script = textwrap.dedent(
889+
f"""\
890+
import time, sys, socket, threading
891+
892+
# Connect to the test process
893+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
894+
sock.connect(('localhost', {port}))
895+
896+
def worker_thread(name, barrier, ready_event):
897+
barrier.wait() # Synchronize thread start
898+
ready_event.wait() # Wait for main thread signal
899+
# Sleep to keep thread alive
900+
time.sleep(10_000)
901+
902+
def main_work():
903+
# Do busy work to hold the GIL
904+
count = 0
905+
while count < 100000000:
906+
count += 1
907+
if count % 10000000 == 0:
908+
pass # Keep main thread busy
909+
910+
# Create synchronization primitives
911+
num_threads = 3
912+
barrier = threading.Barrier(num_threads + 1) # +1 for main thread
913+
ready_event = threading.Event()
914+
915+
# Start worker threads
916+
threads = []
917+
for i in range(num_threads):
918+
t = threading.Thread(target=worker_thread, args=(f"Worker-{{i}}", barrier, ready_event))
919+
t.start()
920+
threads.append(t)
921+
922+
# Wait for all threads to be ready
923+
barrier.wait()
924+
925+
# Signal ready to parent process
926+
sock.sendall(b"ready\\n")
927+
928+
# Signal threads to start waiting
929+
ready_event.set()
930+
931+
# Give threads time to start sleeping
932+
time.sleep(0.1)
933+
934+
# Now do busy work to hold the GIL
935+
main_work()
936+
"""
937+
)
938+
939+
with os_helper.temp_dir() as work_dir:
940+
script_dir = os.path.join(work_dir, "script_pkg")
941+
os.mkdir(script_dir)
942+
943+
# Create a socket server to communicate with the target process
944+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
945+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
946+
server_socket.bind(("localhost", port))
947+
server_socket.settimeout(SHORT_TIMEOUT)
948+
server_socket.listen(1)
949+
950+
script_name = _make_test_script(script_dir, "script", script)
951+
client_socket = None
952+
try:
953+
p = subprocess.Popen([sys.executable, script_name])
954+
client_socket, _ = server_socket.accept()
955+
server_socket.close()
956+
957+
# Wait for ready signal
958+
response = b""
959+
while b"ready" not in response:
960+
response += client_socket.recv(1024)
961+
962+
# Give threads a moment to start their busy work
963+
time.sleep(0.1)
964+
965+
# Get stack trace with all threads
966+
unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
967+
all_traces = unwinder_all.get_stack_trace()
968+
969+
# Get stack trace with only GIL holder
970+
unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True)
971+
gil_traces = unwinder_gil.get_stack_trace()
972+
973+
except PermissionError:
974+
self.skipTest(
975+
"Insufficient permissions to read the stack trace"
976+
)
977+
finally:
978+
if client_socket is not None:
979+
client_socket.close()
980+
p.kill()
981+
p.terminate()
982+
p.wait(timeout=SHORT_TIMEOUT)
983+
984+
# Verify we got multiple threads in all_traces
985+
self.assertGreater(len(all_traces), 1, "Should have multiple threads")
986+
987+
# Verify we got exactly one thread in gil_traces
988+
self.assertEqual(len(gil_traces), 1, "Should have exactly one GIL holder")
989+
990+
# The GIL holder should be in the all_traces list
991+
gil_thread_id = gil_traces[0][0]
992+
all_thread_ids = [trace[0] for trace in all_traces]
993+
self.assertIn(gil_thread_id, all_thread_ids,
994+
"GIL holder should be among all threads")
995+
879996

880997
if __name__ == "__main__":
881998
unittest.main()

Modules/_remote_debugging_module.c

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,14 @@
6464
#endif
6565

6666
#ifdef Py_GIL_DISABLED
67-
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68-
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
67+
#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68+
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
70+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7071
#else
71-
#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
72-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
72+
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
73+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
74+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7375
#endif
7476
#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
7577

@@ -206,6 +208,7 @@ typedef struct {
206208
uint64_t code_object_generation;
207209
_Py_hashtable_t *code_object_cache;
208210
int debug;
211+
int only_active_thread;
209212
RemoteDebuggingState *cached_state; // Cached module state
210213
#ifdef Py_GIL_DISABLED
211214
// TLBC cache invalidation tracking
@@ -2496,6 +2499,7 @@ _remote_debugging.RemoteUnwinder.__init__
24962499
pid: int
24972500
*
24982501
all_threads: bool = False
2502+
only_active_thread: bool = False
24992503
debug: bool = False
25002504
25012505
Initialize a new RemoteUnwinder object for debugging a remote Python process.
@@ -2504,6 +2508,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
25042508
pid: Process ID of the target Python process to debug
25052509
all_threads: If True, initialize state for all threads in the process.
25062510
If False, only initialize for the main thread.
2511+
only_active_thread: If True, only sample the thread holding the GIL.
2512+
Cannot be used together with all_threads=True.
25072513
debug: If True, chain exceptions to explain the sequence of events that
25082514
lead to the exception.
25092515
@@ -2514,15 +2520,25 @@ process, including examining thread states, stack frames and other runtime data.
25142520
PermissionError: If access to the target process is denied
25152521
OSError: If unable to attach to the target process or access its memory
25162522
RuntimeError: If unable to read debug information from the target process
2523+
ValueError: If both all_threads and only_active_thread are True
25172524
[clinic start generated code]*/
25182525

25192526
static int
25202527
_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
25212528
int pid, int all_threads,
2529+
int only_active_thread,
25222530
int debug)
2523-
/*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/
2531+
/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/
25242532
{
2533+
// Validate that all_threads and only_active_thread are not both True
2534+
if (all_threads && only_active_thread) {
2535+
PyErr_SetString(PyExc_ValueError,
2536+
"all_threads and only_active_thread cannot both be True");
2537+
return -1;
2538+
}
2539+
25252540
self->debug = debug;
2541+
self->only_active_thread = only_active_thread;
25262542
self->cached_state = NULL;
25272543
if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
25282544
set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
@@ -2602,13 +2618,18 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
26022618
@critical_section
26032619
_remote_debugging.RemoteUnwinder.get_stack_trace
26042620
2605-
Returns a list of stack traces for all threads in the target process.
2621+
Returns a list of stack traces for threads in the target process.
26062622
26072623
Each element in the returned list is a tuple of (thread_id, frame_list), where:
26082624
- thread_id is the OS thread identifier
26092625
- frame_list is a list of tuples (function_name, filename, line_number) representing
26102626
the Python stack frames for that thread, ordered from most recent to oldest
26112627
2628+
The threads returned depend on the initialization parameters:
2629+
- If only_active_thread was True: returns only the thread holding the GIL
2630+
- If all_threads was True: returns all threads
2631+
- Otherwise: returns only the main thread
2632+
26122633
Example:
26132634
[
26142635
(1234, [
@@ -2632,7 +2653,7 @@ Each element in the returned list is a tuple of (thread_id, frame_list), where:
26322653

26332654
static PyObject *
26342655
_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
2635-
/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/
2656+
/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/
26362657
{
26372658
PyObject* result = NULL;
26382659
// Read interpreter state into opaque buffer
@@ -2655,6 +2676,28 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26552676
_Py_hashtable_clear(self->code_object_cache);
26562677
}
26572678

2679+
// If only_active_thread is true, we need to determine which thread holds the GIL
2680+
PyThreadState* gil_holder = NULL;
2681+
if (self->only_active_thread) {
2682+
// The GIL state is already in interp_state_buffer, just read from there
2683+
// Check if GIL is locked
2684+
int gil_locked = GET_MEMBER(int, interp_state_buffer,
2685+
self->debug_offsets.interpreter_state.gil_runtime_state_locked);
2686+
2687+
if (gil_locked) {
2688+
// Get the last holder (current holder when GIL is locked)
2689+
gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer,
2690+
self->debug_offsets.interpreter_state.gil_runtime_state_holder);
2691+
} else {
2692+
// GIL is not locked, return empty list
2693+
result = PyList_New(0);
2694+
if (!result) {
2695+
set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list");
2696+
}
2697+
goto exit;
2698+
}
2699+
}
2700+
26582701
#ifdef Py_GIL_DISABLED
26592702
// Check TLBC generation and invalidate cache if needed
26602703
uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
@@ -2666,7 +2709,10 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26662709
#endif
26672710

26682711
uintptr_t current_tstate;
2669-
if (self->tstate_addr == 0) {
2712+
if (self->only_active_thread && gil_holder != NULL) {
2713+
// We have the GIL holder, process only that thread
2714+
current_tstate = (uintptr_t)gil_holder;
2715+
} else if (self->tstate_addr == 0) {
26702716
// Get threads head from buffer
26712717
current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
26722718
self->debug_offsets.interpreter_state.threads_head);
@@ -2700,6 +2746,11 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
27002746
if (self->tstate_addr) {
27012747
break;
27022748
}
2749+
2750+
// If we're only processing the GIL holder, we're done after one iteration
2751+
if (self->only_active_thread && gil_holder != NULL) {
2752+
break;
2753+
}
27032754
}
27042755

27052756
exit:

0 commit comments

Comments
 (0)