diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ac8798ff6129a0..e43081c22ce75b 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -113,6 +113,9 @@ struct _ts { /* Currently holds the GIL. Must be its own field to avoid data races */ int holds_gil; + /* Currently requesting the GIL */ + int gil_requested; + int _whence; /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 8e7cd16acffa48..f6d50bf5df7a9e 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets { uint64_t native_thread_id; uint64_t datastack_chunk; uint64_t status; + uint64_t holds_gil; + uint64_t gil_requested; } thread_state; // InterpreterFrame offset; @@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets { .native_thread_id = offsetof(PyThreadState, native_thread_id), \ .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ .status = offsetof(PyThreadState, _status), \ + .holds_gil = offsetof(PyThreadState, holds_gil), \ + .gil_requested = offsetof(PyThreadState, gil_requested), \ }, \ .interpreter_frame = { \ .size = sizeof(_PyInterpreterFrame), \ diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index b7a033ac0a6637..311933480cbf17 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,17 +1,13 @@ from abc import ABC, abstractmethod -# Enums are slow -THREAD_STATE_RUNNING = 0 -THREAD_STATE_IDLE = 1 -THREAD_STATE_GIL_WAIT = 2 -THREAD_STATE_UNKNOWN = 3 - -STATUS = { - THREAD_STATE_RUNNING: "running", - THREAD_STATE_IDLE: "idle", - THREAD_STATE_GIL_WAIT: "gil_wait", - THREAD_STATE_UNKNOWN: "unknown", -} +# Thread status flags +try: + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN +except ImportError: + # Fallback for tests or when module is not available + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_UNKNOWN = (1 << 2) class Collector(ABC): @abstractmethod @@ -26,8 +22,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False): """Iterate over all frame stacks from all interpreters and threads.""" for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: - if skip_idle and thread_info.status != THREAD_STATE_RUNNING: - continue + # skip_idle now means: skip if thread is not actively running + # A thread is "active" if it has the GIL OR is on CPU + if skip_idle: + status_flags = thread_info.status + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + if not (has_gil or on_cpu): + continue frames = thread_info.frame_info if frames: yield frames, thread_info.thread_id diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 548acbf24b7fd2..d21aa0ed60b855 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -1,9 +1,20 @@ +import itertools import json import os import platform +import sys +import threading import time -from .collector import Collector, THREAD_STATE_RUNNING +from .collector import Collector +try: + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED +except ImportError: + # Fallback if module not available (shouldn't happen in normal use) + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_UNKNOWN = (1 << 2) + THREAD_STATUS_GIL_REQUESTED = (1 << 3) # Categories matching Firefox Profiler expectations @@ -11,14 +22,20 @@ {"name": "Other", "color": "grey", "subcategories": ["Other"]}, {"name": "Python", "color": "yellow", "subcategories": ["Other"]}, {"name": "Native", "color": "blue", "subcategories": ["Other"]}, - {"name": "Idle", "color": "transparent", "subcategories": ["Other"]}, + {"name": "GC", "color": "orange", "subcategories": ["Other"]}, + {"name": "GIL", "color": "green", "subcategories": ["Other"]}, + {"name": "CPU", "color": "purple", "subcategories": ["Other"]}, + {"name": "Code Type", "color": "red", "subcategories": ["Other"]}, ] # Category indices CATEGORY_OTHER = 0 CATEGORY_PYTHON = 1 CATEGORY_NATIVE = 2 -CATEGORY_IDLE = 3 +CATEGORY_GC = 3 +CATEGORY_GIL = 4 +CATEGORY_CPU = 5 +CATEGORY_CODE_TYPE = 6 # Subcategory indices DEFAULT_SUBCATEGORY = 0 @@ -58,6 +75,43 @@ def __init__(self, *, skip_idle=False): self.last_sample_time = 0 self.interval = 1.0 # Will be calculated from actual sampling + # State tracking for interval markers (tid -> start_time) + self.has_gil_start = {} # Thread has the GIL + self.no_gil_start = {} # Thread doesn't have the GIL + self.on_cpu_start = {} # Thread is running on CPU + self.off_cpu_start = {} # Thread is off CPU + self.python_code_start = {} # Thread running Python code (has GIL) + self.native_code_start = {} # Thread running native code (on CPU without GIL) + self.gil_wait_start = {} # Thread waiting for GIL + + # GC event tracking: track if we're currently in a GC + self.potential_gc_start = None + + def _track_state_transition(self, tid, condition, active_dict, inactive_dict, + active_name, inactive_name, category, current_time): + """Track binary state transitions and emit markers. + + Args: + tid: Thread ID + condition: Whether the active state is true + active_dict: Dict tracking start time of active state + inactive_dict: Dict tracking start time of inactive state + active_name: Name for active state marker + inactive_name: Name for inactive state marker + category: Gecko category for the markers + current_time: Current timestamp + """ + if condition: + active_dict.setdefault(tid, current_time) + if tid in inactive_dict: + self._add_marker(tid, inactive_name, inactive_dict.pop(tid), + current_time, category) + else: + inactive_dict.setdefault(tid, current_time) + if tid in active_dict: + self._add_marker(tid, active_name, active_dict.pop(tid), + current_time, category) + def collect(self, stack_frames): """Collect a sample from stack frames.""" current_time = (time.time() * 1000) - self.start_time @@ -69,18 +123,16 @@ def collect(self, stack_frames): ) / self.sample_count self.last_sample_time = current_time + # GC Event Detection and process threads + gc_collecting = False + for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: - if ( - self.skip_idle - and thread_info.status != THREAD_STATE_RUNNING - ): - continue + # Track GC status + if thread_info.gc_collecting: + gc_collecting = True frames = thread_info.frame_info - if not frames: - continue - tid = thread_info.thread_id # Initialize thread if needed @@ -89,6 +141,61 @@ def collect(self, stack_frames): thread_data = self.threads[tid] + # Decode status flags + status_flags = thread_info.status + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED) + + # Track GIL possession (Has GIL / No GIL) + self._track_state_transition( + tid, has_gil, self.has_gil_start, self.no_gil_start, + "Has GIL", "No GIL", CATEGORY_GIL, current_time + ) + + # Track CPU state (On CPU / Off CPU) + self._track_state_transition( + tid, on_cpu, self.on_cpu_start, self.off_cpu_start, + "On CPU", "Off CPU", CATEGORY_CPU, current_time + ) + + # Track code type (Python Code / Native Code) + if has_gil: + self._track_state_transition( + tid, True, self.python_code_start, self.native_code_start, + "Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time + ) + elif on_cpu: + self._track_state_transition( + tid, True, self.native_code_start, self.python_code_start, + "Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time + ) + else: + # Neither has GIL nor on CPU - end both if running + if tid in self.python_code_start: + self._add_marker(tid, "Python Code", self.python_code_start.pop(tid), + current_time, CATEGORY_CODE_TYPE) + if tid in self.native_code_start: + self._add_marker(tid, "Native Code", self.native_code_start.pop(tid), + current_time, CATEGORY_CODE_TYPE) + + # Track "Waiting for GIL" intervals (one-sided tracking) + if gil_requested: + self.gil_wait_start.setdefault(tid, current_time) + elif tid in self.gil_wait_start: + self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid), + current_time, CATEGORY_GIL) + + # Categorize: idle if neither has GIL nor on CPU + is_idle = not has_gil and not on_cpu + + # Skip idle threads if skip_idle is enabled + if self.skip_idle and is_idle: + continue + + if not frames: + continue + # Process the stack stack_index = self._process_stack(thread_data, frames) @@ -98,11 +205,21 @@ def collect(self, stack_frames): samples["time"].append(current_time) samples["eventDelay"].append(None) + # Handle GC event markers after processing all threads + if gc_collecting: + if self.potential_gc_start is None: + # Start of GC + self.potential_gc_start = current_time + else: + # End of GC + if self.potential_gc_start is not None: + self._add_gc_marker(self.potential_gc_start, current_time) + self.potential_gc_start = None + self.sample_count += 1 def _create_thread(self, tid): """Create a new thread structure with processed profile format.""" - import threading # Determine if this is the main thread try: @@ -181,7 +298,7 @@ def _create_thread(self, tid): "functionSize": [], "length": 0, }, - # Markers - processed format + # Markers - processed format (arrays) "markers": { "data": [], "name": [], @@ -215,6 +332,36 @@ def _intern_string(self, s): self.global_string_map[s] = idx return idx + def _add_marker(self, tid, name, start_time, end_time, category): + """Add an interval marker for a specific thread.""" + if tid not in self.threads: + return + + thread_data = self.threads[tid] + duration = end_time - start_time + + name_idx = self._intern_string(name) + markers = thread_data["markers"] + markers["name"].append(name_idx) + markers["startTime"].append(start_time) + markers["endTime"].append(end_time) + markers["phase"].append(1) # 1 = interval marker + markers["category"].append(category) + markers["data"].append({ + "type": name.replace(" ", ""), + "duration": duration, + "tid": tid + }) + + def _add_gc_marker(self, start_time, end_time): + """Add a GC Collecting event marker to the main thread (or first thread we see).""" + if not self.threads: + return + + # Add GC marker to the first thread (typically the main thread) + first_tid = next(iter(self.threads)) + self._add_marker(first_tid, "GC Collecting", start_time, end_time, CATEGORY_GC) + def _process_stack(self, thread_data, frames): """Process a stack and return the stack index.""" if not frames: @@ -383,15 +530,67 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno): frame_cache[frame_key] = frame_idx return frame_idx + def _finalize_markers(self): + """Close any open markers at the end of profiling.""" + end_time = self.last_sample_time + + # Close all open markers for each thread using a generic approach + marker_states = [ + (self.has_gil_start, "Has GIL", CATEGORY_GIL), + (self.no_gil_start, "No GIL", CATEGORY_GIL), + (self.on_cpu_start, "On CPU", CATEGORY_CPU), + (self.off_cpu_start, "Off CPU", CATEGORY_CPU), + (self.python_code_start, "Python Code", CATEGORY_CODE_TYPE), + (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE), + (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL), + ] + + for state_dict, marker_name, category in marker_states: + for tid in list(state_dict.keys()): + self._add_marker(tid, marker_name, state_dict[tid], end_time, category) + del state_dict[tid] + + # Close any open GC marker + if self.potential_gc_start is not None: + self._add_gc_marker(self.potential_gc_start, end_time) + self.potential_gc_start = None + def export(self, filename): """Export the profile to a Gecko JSON file.""" + if self.sample_count > 0 and self.last_sample_time > 0: self.interval = self.last_sample_time / self.sample_count - profile = self._build_profile() + # Spinner for progress indication + spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']) + stop_spinner = threading.Event() + + def spin(): + message = 'Building Gecko profile...' + while not stop_spinner.is_set(): + sys.stderr.write(f'\r{next(spinner)} {message}') + sys.stderr.flush() + time.sleep(0.1) + # Clear the spinner line + sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r') + sys.stderr.flush() + + spinner_thread = threading.Thread(target=spin, daemon=True) + spinner_thread.start() + + try: + # Finalize any open markers before building profile + self._finalize_markers() + + profile = self._build_profile() - with open(filename, "w") as f: - json.dump(profile, f, separators=(",", ":")) + with open(filename, "w") as f: + json.dump(profile, f, separators=(",", ":")) + finally: + stop_spinner.set() + spinner_thread.join(timeout=1.0) + # Small delay to ensure the clear happens + time.sleep(0.01) print(f"Gecko profile written to {filename}") print( @@ -416,6 +615,7 @@ def _build_profile(self): frame_table["length"] = len(frame_table["func"]) func_table["length"] = len(func_table["name"]) resource_table["length"] = len(resource_table["name"]) + thread_data["markers"]["length"] = len(thread_data["markers"]["name"]) # Clean up internal caches del thread_data["_stackCache"] diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index e0d4583f0a1aec..5cea5ef184eaff 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -21,6 +21,7 @@ PROFILING_MODE_WALL = 0 PROFILING_MODE_CPU = 1 PROFILING_MODE_GIL = 2 +PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks def _parse_mode(mode_string): @@ -136,18 +137,20 @@ def _run_with_sync(original_cmd): class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads, mode=mode + self.pid, all_threads=self.all_threads, mode=mode, + skip_non_matching_threads=skip_non_matching_threads ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads, mode=mode + self.pid, only_active_thread=only_active_threads, mode=mode, + skip_non_matching_threads=skip_non_matching_threads ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -614,14 +617,21 @@ def sample( realtime_stats=False, mode=PROFILING_MODE_WALL, ): + # PROFILING_MODE_ALL implies no skipping at all + if mode == PROFILING_MODE_ALL: + skip_non_matching_threads = False + skip_idle = False + else: + # Determine skip settings based on output format and mode + skip_non_matching_threads = output_format != "gecko" + skip_idle = mode != PROFILING_MODE_WALL + profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads, mode=mode + pid, sample_interval_usec, all_threads=all_threads, mode=mode, + skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats - # Determine skip_idle for collector compatibility - skip_idle = mode != PROFILING_MODE_WALL - collector = None match output_format: case "pstats": @@ -633,7 +643,8 @@ def sample( collector = FlamegraphCollector(skip_idle=skip_idle) filename = filename or f"flamegraph.{pid}.html" case "gecko": - collector = GeckoCollector(skip_idle=skip_idle) + # Gecko format never skips idle threads to show full thread states + collector = GeckoCollector(skip_idle=False) filename = filename or f"gecko.{pid}.json" case _: raise ValueError(f"Invalid output format: {output_format}") @@ -877,6 +888,10 @@ def main(): if args.format in ("collapsed", "gecko"): _validate_collapsed_format_args(args, parser) + # Validate that --mode is not used with --gecko + if args.format == "gecko" and args.mode != "wall": + parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).") + sort_value = args.sort if args.sort is not None else 2 if args.module is not None and not args.module: @@ -895,7 +910,11 @@ def main(): elif target_count > 1: parser.error("only one target type can be specified: -p/--pid, -m/--module, or script") - mode = _parse_mode(args.mode) + # Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice + if args.format == "gecko": + mode = PROFILING_MODE_ALL + else: + mode = _parse_mode(args.mode) if args.pid: sample( diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 01720457e61f5c..60e5000cd72a32 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -23,6 +23,12 @@ PROFILING_MODE_WALL = 0 PROFILING_MODE_CPU = 1 PROFILING_MODE_GIL = 2 +PROFILING_MODE_ALL = 3 + +# Thread status flags +THREAD_STATUS_HAS_GIL = (1 << 0) +THREAD_STATUS_ON_CPU = (1 << 1) +THREAD_STATUS_UNKNOWN = (1 << 2) try: from concurrent import interpreters @@ -1763,11 +1769,14 @@ def busy(): for thread_info in interpreter_info.threads: statuses[thread_info.thread_id] = thread_info.status - # Check if sleeper thread is idle and busy thread is running + # Check if sleeper thread is off CPU and busy thread is on CPU + # In the new flags system: + # - sleeper should NOT have ON_CPU flag (off CPU) + # - busy should have ON_CPU flag if (sleeper_tid in statuses and busy_tid in statuses and - statuses[sleeper_tid] == 1 and - statuses[busy_tid] == 0): + not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and + (statuses[busy_tid] & THREAD_STATUS_ON_CPU)): break time.sleep(0.5) # Give a bit of time to let threads settle except PermissionError: @@ -1779,8 +1788,8 @@ def busy(): self.assertIsNotNone(busy_tid, "Busy thread id not received") self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") - self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)") - self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU") finally: if client_socket is not None: @@ -1875,11 +1884,14 @@ def busy(): for thread_info in interpreter_info.threads: statuses[thread_info.thread_id] = thread_info.status - # Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running + # Check if sleeper thread doesn't have GIL and busy thread has GIL + # In the new flags system: + # - sleeper should NOT have HAS_GIL flag (waiting for GIL) + # - busy should have HAS_GIL flag if (sleeper_tid in statuses and busy_tid in statuses and - statuses[sleeper_tid] == 2 and - statuses[busy_tid] == 0): + not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and + (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)): break time.sleep(0.5) # Give a bit of time to let threads settle except PermissionError: @@ -1891,8 +1903,8 @@ def busy(): self.assertIsNotNone(busy_tid, "Busy thread id not received") self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") - self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)") - self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL") finally: if client_socket is not None: @@ -1900,6 +1912,128 @@ def busy(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception") + def test_thread_status_all_mode_detection(self): + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import socket + import threading + import time + import sys + + def sleeper_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"sleeper:" + str(threading.get_native_id()).encode()) + while True: + time.sleep(1) + + def busy_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"busy:" + str(threading.get_native_id()).encode()) + while True: + sum(range(100000)) + + t1 = threading.Thread(target=sleeper_thread) + t2 = threading.Thread(target=busy_thread) + t1.start() + t2.start() + t1.join() + t2.join() + """ + ) + + with os_helper.temp_dir() as tmp_dir: + script_file = make_script(tmp_dir, "script", script) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.listen(2) + server_socket.settimeout(SHORT_TIMEOUT) + + p = subprocess.Popen( + [sys.executable, script_file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + client_sockets = [] + try: + sleeper_tid = None + busy_tid = None + + # Receive thread IDs from the child process + for _ in range(2): + client_socket, _ = server_socket.accept() + client_sockets.append(client_socket) + line = client_socket.recv(1024) + if line: + if line.startswith(b"sleeper:"): + try: + sleeper_tid = int(line.split(b":")[-1]) + except Exception: + pass + elif line.startswith(b"busy:"): + try: + busy_tid = int(line.split(b":")[-1]) + except Exception: + pass + + server_socket.close() + + attempts = 10 + statuses = {} + try: + unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL, + skip_non_matching_threads=False) + for _ in range(attempts): + traces = unwinder.get_stack_trace() + # Find threads and their statuses + statuses = {} + for interpreter_info in traces: + for thread_info in interpreter_info.threads: + statuses[thread_info.thread_id] = thread_info.status + + # Check ALL mode provides both GIL and CPU info + # - sleeper should NOT have ON_CPU and NOT have HAS_GIL + # - busy should have ON_CPU and have HAS_GIL + if (sleeper_tid in statuses and + busy_tid in statuses and + not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and + not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and + (statuses[busy_tid] & THREAD_STATUS_ON_CPU) and + (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)): + break + time.sleep(0.5) + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + + self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received") + self.assertIsNotNone(busy_tid, "Busy thread id not received") + self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") + self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") + + # Sleeper thread: off CPU, no GIL + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL") + + # Busy thread: on CPU, has GIL + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL") + + finally: + for client_socket in client_sockets: + client_socket.close() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) + p.stdout.close() + p.stderr.close() if __name__ == "__main__": diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index a1342cafff1f83..00694c551ed06d 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -56,12 +56,14 @@ def __repr__(self): class MockThreadInfo: """Mock ThreadInfo for testing since the real one isn't accessible.""" - def __init__(self, thread_id, frame_info): + def __init__(self, thread_id, frame_info, status=0, gc_collecting=False): # Default to THREAD_STATE_RUNNING (0) self.thread_id = thread_id self.frame_info = frame_info + self.status = status + self.gc_collecting = gc_collecting def __repr__(self): - return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})" + return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status}, gc_collecting={self.gc_collecting})" class MockInterpreterInfo: @@ -665,6 +667,97 @@ def test_gecko_collector_export(self): self.assertIn("func2", string_array) self.assertIn("other_func", string_array) + def test_gecko_collector_markers(self): + """Test Gecko profile markers for GIL and CPU state tracking.""" + try: + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED + except ImportError: + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_GIL_REQUESTED = (1 << 3) + + collector = GeckoCollector() + + # Status combinations for different thread states + HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Running Python code + NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU # Running native code + WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED # Waiting for GIL + + # Simulate thread state transitions + collector.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU) + ]) + ]) + + collector.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [("test.py", 15, "wait_func")], status=WAITING_FOR_GIL) + ]) + ]) + + collector.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU) + ]) + ]) + + collector.collect([ + MockInterpreterInfo(0, [ + MockThreadInfo(1, [("native.c", 100, "native_func")], status=NO_GIL_ON_CPU) + ]) + ]) + + profile_data = collector._build_profile() + + # Verify we have threads with markers + self.assertIn("threads", profile_data) + self.assertEqual(len(profile_data["threads"]), 1) + thread_data = profile_data["threads"][0] + + # Check markers exist + self.assertIn("markers", thread_data) + markers = thread_data["markers"] + + # Should have marker arrays + self.assertIn("name", markers) + self.assertIn("startTime", markers) + self.assertIn("endTime", markers) + self.assertIn("category", markers) + self.assertGreater(markers["length"], 0, "Should have generated markers") + + # Get marker names from string table + string_array = profile_data["shared"]["stringArray"] + marker_names = [string_array[idx] for idx in markers["name"]] + + # Verify we have different marker types + marker_name_set = set(marker_names) + + # Should have "Has GIL" markers (when thread had GIL) + self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL' markers") + + # Should have "No GIL" markers (when thread didn't have GIL) + self.assertIn("No GIL", marker_name_set, "Should have 'No GIL' markers") + + # Should have "On CPU" markers (when thread was on CPU) + self.assertIn("On CPU", marker_name_set, "Should have 'On CPU' markers") + + # Should have "Waiting for GIL" markers (when thread was waiting) + self.assertIn("Waiting for GIL", marker_name_set, "Should have 'Waiting for GIL' markers") + + # Verify marker structure + for i in range(markers["length"]): + # All markers should be interval markers (phase = 1) + self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be interval marker") + + # All markers should have valid time range + start_time = markers["startTime"][i] + end_time = markers["endTime"][i] + self.assertLessEqual(start_time, end_time, f"Marker {i} should have valid time range") + + # All markers should have valid category + self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i} should have valid category") + def test_pstats_collector_export(self): collector = PstatsCollector( sample_interval_usec=1000000 @@ -2598,19 +2691,30 @@ def test_mode_validation(self): def test_frames_filtered_with_skip_idle(self): """Test that frames are actually filtered when skip_idle=True.""" + # Import thread status flags + try: + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU + except ImportError: + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + # Create mock frames with different thread statuses class MockThreadInfoWithStatus: def __init__(self, thread_id, frame_info, status): self.thread_id = thread_id self.frame_info = frame_info self.status = status + self.gc_collecting = False + + # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread + ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Has GIL and on CPU + IDLE_STATUS = 0 # Neither has GIL nor on CPU - # Create test data: running thread, idle thread, and another running thread test_frames = [ MockInterpreterInfo(0, [ - MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0), # RUNNING - MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1), # IDLE - MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0), # RUNNING + MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], ACTIVE_STATUS), + MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], IDLE_STATUS), + MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], ACTIVE_STATUS), ]) ] diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 5937d4892f550f..8b2478ca9d048c 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -11,6 +11,7 @@ * HEADERS AND INCLUDES * ============================================================================ */ +#include #include #include #include @@ -81,6 +82,8 @@ typedef enum _WIN32_THREADSTATE { #define SIZEOF_TYPE_OBJ sizeof(PyTypeObject) #define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject) #define SIZEOF_LONG_OBJ sizeof(PyLongObject) +#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state) +#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState) // Calculate the minimum buffer size needed to read interpreter state fields // We need to read code_object_generation and potentially tlbc_generation @@ -178,8 +181,9 @@ static PyStructSequence_Desc CoroInfo_desc = { // ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info) static PyStructSequence_Field ThreadInfo_fields[] = { {"thread_id", "Thread ID"}, - {"status", "Thread status"}, + {"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"}, {"frame_info", "Frame information"}, + {"gc_collecting", "Whether GC is collecting (interpreter-level)"}, {NULL} }; @@ -187,7 +191,7 @@ static PyStructSequence_Desc ThreadInfo_desc = { "_remote_debugging.ThreadInfo", "Information about a thread", ThreadInfo_fields, - 2 + 3 }; // InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list) @@ -247,9 +251,16 @@ enum _ThreadState { enum _ProfilingMode { PROFILING_MODE_WALL = 0, PROFILING_MODE_CPU = 1, - PROFILING_MODE_GIL = 2 + PROFILING_MODE_GIL = 2, + PROFILING_MODE_ALL = 3 // Combines GIL + CPU checks }; +// Thread status flags (can be combined) +#define THREAD_STATUS_HAS_GIL (1 << 0) // Thread has the GIL +#define THREAD_STATUS_ON_CPU (1 << 1) // Thread is running on CPU +#define THREAD_STATUS_UNKNOWN (1 << 2) // Status could not be determined +#define THREAD_STATUS_GIL_REQUESTED (1 << 3) // Thread is waiting for the GIL + typedef struct { PyObject_HEAD proc_handle_t handle; @@ -2650,28 +2661,74 @@ unwind_stack_for_thread( long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id); - // Calculate thread status based on mode - int status = THREAD_STATE_UNKNOWN; - if (unwinder->mode == PROFILING_MODE_CPU) { - long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); - status = get_thread_status(unwinder, tid, pthread_id); - if (status == -1) { - PyErr_Print(); - PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status"); - goto error; - } - } else if (unwinder->mode == PROFILING_MODE_GIL) { + // Read GC collecting state from the interpreter (before any skip checks) + uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp); + + // Read the GC runtime state from the interpreter state + uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc; + char gc_state[SIZEOF_GC_RUNTIME_STATE]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state"); + goto error; + } + + int gc_collecting = GET_MEMBER(int, gc_state, unwinder->debug_offsets.gc.collecting); + + // Calculate thread status using flags (always) + int status_flags = 0; + + // Check GIL status + int has_gil = 0; + int gil_requested = 0; #ifdef Py_GIL_DISABLED - // All threads are considered running in free threading builds if they have a thread state attached - int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active; - status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; + int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active; + has_gil = active; #else - status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; + // Read holds_gil directly from thread state + has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil); + + // Check if thread is actively requesting the GIL + if (unwinder->debug_offsets.thread_state.gil_requested != 0) { + gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested); + } + + // Set GIL_REQUESTED flag if thread is waiting + if (!has_gil && gil_requested) { + status_flags |= THREAD_STATUS_GIL_REQUESTED; + } #endif - } else { - // PROFILING_MODE_WALL - all threads are considered running - status = THREAD_STATE_RUNNING; + if (has_gil) { + status_flags |= THREAD_STATUS_HAS_GIL; + } + + // Assert that we never have both HAS_GIL and GIL_REQUESTED set at the same time + // This would indicate a race condition in the GIL state tracking + assert(!(has_gil && gil_requested)); + + // Check CPU status + long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); + int cpu_status = get_thread_status(unwinder, tid, pthread_id); + if (cpu_status == -1) { + status_flags |= THREAD_STATUS_UNKNOWN; + } else if (cpu_status == THREAD_STATE_RUNNING) { + status_flags |= THREAD_STATUS_ON_CPU; + } + + // Determine if we should skip based on mode + int status = THREAD_STATE_RUNNING; // Default for skip logic + if (unwinder->mode == PROFILING_MODE_CPU) { + status = cpu_status; + } else if (unwinder->mode == PROFILING_MODE_GIL) { + status = has_gil ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT; + } else if (unwinder->mode == PROFILING_MODE_ALL) { + // In ALL mode, considered running if has GIL or on CPU + if (has_gil || (status_flags & THREAD_STATUS_ON_CPU)) { + status = THREAD_STATE_RUNNING; + } else { + status = THREAD_STATE_IDLE; + } } + // PROFILING_MODE_WALL defaults to THREAD_STATE_RUNNING // Check if we should skip this thread based on mode int should_skip = 0; @@ -2719,16 +2776,25 @@ unwind_stack_for_thread( goto error; } - PyObject *py_status = PyLong_FromLong(status); + // Always use status_flags + PyObject *py_status = PyLong_FromLong(status_flags); if (py_status == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status"); goto error; } - PyErr_Print(); + PyObject *py_gc_collecting = PyBool_FromLong(gc_collecting); + if (py_gc_collecting == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create gc_collecting"); + Py_DECREF(py_status); + goto error; + } + + // py_status contains status flags (bitfield) PyStructSequence_SetItem(result, 0, thread_id); PyStructSequence_SetItem(result, 1, py_status); // Steals reference PyStructSequence_SetItem(result, 2, frame_info); // Steals reference + PyStructSequence_SetItem(result, 3, py_gc_collecting); // Steals reference cleanup_stack_chunks(&chunks); return result; @@ -3401,6 +3467,21 @@ _remote_debugging_exec(PyObject *m) if (rc < 0) { return -1; } + + // Add thread status flag constants + if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) { + return -1; + } + if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) { + return -1; + } + if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) { + return -1; + } + if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) { + return -1; + } + if (RemoteDebugging_InitState(st) < 0) { return -1; } diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 6bf64868cbb2d3..ff429c797a4774 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -207,6 +207,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil) _Py_atomic_store_int_relaxed(&gil->locked, 0); if (tstate != NULL) { tstate->holds_gil = 0; + tstate->gil_requested = 0; } COND_SIGNAL(gil->cond); MUTEX_UNLOCK(gil->mutex); @@ -320,6 +321,8 @@ take_gil(PyThreadState *tstate) MUTEX_LOCK(gil->mutex); + tstate->gil_requested = 1; + int drop_requested = 0; while (_Py_atomic_load_int_relaxed(&gil->locked)) { unsigned long saved_switchnum = gil->switch_number; @@ -407,6 +410,7 @@ take_gil(PyThreadState *tstate) } assert(_PyThreadState_CheckConsistency(tstate)); + tstate->gil_requested = 0; tstate->holds_gil = 1; _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); update_eval_breaker_for_thread(interp, tstate);