Skip to content

Commit 89a914c

Browse files
authored
pythongh-135953: Add GIL contention markers to sampling profiler Gecko format (python#139485)
This commit enhances the Gecko format reporter in the sampling profiler to include markers for GIL acquisition events.
1 parent 994ab5c commit 89a914c

File tree

9 files changed

+627
-82
lines changed

9 files changed

+627
-82
lines changed

Include/cpython/pystate.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ struct _ts {
113113
/* Currently holds the GIL. Must be its own field to avoid data races */
114114
int holds_gil;
115115

116+
/* Currently requesting the GIL */
117+
int gil_requested;
118+
116119
int _whence;
117120

118121
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).

Include/internal/pycore_debug_offsets.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
106106
uint64_t native_thread_id;
107107
uint64_t datastack_chunk;
108108
uint64_t status;
109+
uint64_t holds_gil;
110+
uint64_t gil_requested;
109111
} thread_state;
110112

111113
// InterpreterFrame offset;
@@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
273275
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
274276
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
275277
.status = offsetof(PyThreadState, _status), \
278+
.holds_gil = offsetof(PyThreadState, holds_gil), \
279+
.gil_requested = offsetof(PyThreadState, gil_requested), \
276280
}, \
277281
.interpreter_frame = { \
278282
.size = sizeof(_PyInterpreterFrame), \
Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
from abc import ABC, abstractmethod
22

3-
# Enums are slow
4-
THREAD_STATE_RUNNING = 0
5-
THREAD_STATE_IDLE = 1
6-
THREAD_STATE_GIL_WAIT = 2
7-
THREAD_STATE_UNKNOWN = 3
8-
9-
STATUS = {
10-
THREAD_STATE_RUNNING: "running",
11-
THREAD_STATE_IDLE: "idle",
12-
THREAD_STATE_GIL_WAIT: "gil_wait",
13-
THREAD_STATE_UNKNOWN: "unknown",
14-
}
3+
# Thread status flags
4+
try:
5+
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
6+
except ImportError:
7+
# Fallback for tests or when module is not available
8+
THREAD_STATUS_HAS_GIL = (1 << 0)
9+
THREAD_STATUS_ON_CPU = (1 << 1)
10+
THREAD_STATUS_UNKNOWN = (1 << 2)
11+
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
1512

1613
class Collector(ABC):
1714
@abstractmethod
@@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
2623
"""Iterate over all frame stacks from all interpreters and threads."""
2724
for interpreter_info in stack_frames:
2825
for thread_info in interpreter_info.threads:
29-
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
30-
continue
26+
# skip_idle now means: skip if thread is not actively running
27+
# A thread is "active" if it has the GIL OR is on CPU
28+
if skip_idle:
29+
status_flags = thread_info.status
30+
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
31+
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
32+
if not (has_gil or on_cpu):
33+
continue
3134
frames = thread_info.frame_info
3235
if frames:
3336
yield frames, thread_info.thread_id

Lib/profiling/sampling/gecko_collector.py

Lines changed: 220 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,41 @@
1+
import itertools
12
import json
23
import os
34
import platform
5+
import sys
6+
import threading
47
import time
58

6-
from .collector import Collector, THREAD_STATE_RUNNING
9+
from .collector import Collector
10+
try:
11+
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
12+
except ImportError:
13+
# Fallback if module not available (shouldn't happen in normal use)
14+
THREAD_STATUS_HAS_GIL = (1 << 0)
15+
THREAD_STATUS_ON_CPU = (1 << 1)
16+
THREAD_STATUS_UNKNOWN = (1 << 2)
17+
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
718

819

920
# Categories matching Firefox Profiler expectations
1021
GECKO_CATEGORIES = [
1122
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
1223
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
1324
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
14-
{"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
25+
{"name": "GC", "color": "orange", "subcategories": ["Other"]},
26+
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
27+
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
28+
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
1529
]
1630

1731
# Category indices
1832
CATEGORY_OTHER = 0
1933
CATEGORY_PYTHON = 1
2034
CATEGORY_NATIVE = 2
21-
CATEGORY_IDLE = 3
35+
CATEGORY_GC = 3
36+
CATEGORY_GIL = 4
37+
CATEGORY_CPU = 5
38+
CATEGORY_CODE_TYPE = 6
2239

2340
# Subcategory indices
2441
DEFAULT_SUBCATEGORY = 0
@@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
5875
self.last_sample_time = 0
5976
self.interval = 1.0 # Will be calculated from actual sampling
6077

78+
# State tracking for interval markers (tid -> start_time)
79+
self.has_gil_start = {} # Thread has the GIL
80+
self.no_gil_start = {} # Thread doesn't have the GIL
81+
self.on_cpu_start = {} # Thread is running on CPU
82+
self.off_cpu_start = {} # Thread is off CPU
83+
self.python_code_start = {} # Thread running Python code (has GIL)
84+
self.native_code_start = {} # Thread running native code (on CPU without GIL)
85+
self.gil_wait_start = {} # Thread waiting for GIL
86+
87+
# GC event tracking: track GC start time per thread
88+
self.gc_start_per_thread = {} # tid -> start_time
89+
90+
# Track which threads have been initialized for state tracking
91+
self.initialized_threads = set()
92+
93+
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
94+
active_name, inactive_name, category, current_time):
95+
"""Track binary state transitions and emit markers.
96+
97+
Args:
98+
tid: Thread ID
99+
condition: Whether the active state is true
100+
active_dict: Dict tracking start time of active state
101+
inactive_dict: Dict tracking start time of inactive state
102+
active_name: Name for active state marker
103+
inactive_name: Name for inactive state marker
104+
category: Gecko category for the markers
105+
current_time: Current timestamp
106+
"""
107+
# On first observation of a thread, just record the current state
108+
# without creating a marker (we don't know what the previous state was)
109+
if tid not in self.initialized_threads:
110+
if condition:
111+
active_dict[tid] = current_time
112+
else:
113+
inactive_dict[tid] = current_time
114+
return
115+
116+
# For already-initialized threads, track transitions
117+
if condition:
118+
active_dict.setdefault(tid, current_time)
119+
if tid in inactive_dict:
120+
self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
121+
current_time, category)
122+
else:
123+
inactive_dict.setdefault(tid, current_time)
124+
if tid in active_dict:
125+
self._add_marker(tid, active_name, active_dict.pop(tid),
126+
current_time, category)
127+
61128
def collect(self, stack_frames):
62129
"""Collect a sample from stack frames."""
63130
current_time = (time.time() * 1000) - self.start_time
@@ -69,26 +136,93 @@ def collect(self, stack_frames):
69136
) / self.sample_count
70137
self.last_sample_time = current_time
71138

139+
# Process threads and track GC per thread
72140
for interpreter_info in stack_frames:
73141
for thread_info in interpreter_info.threads:
74-
if (
75-
self.skip_idle
76-
and thread_info.status != THREAD_STATE_RUNNING
77-
):
78-
continue
79-
80142
frames = thread_info.frame_info
81-
if not frames:
82-
continue
83-
84143
tid = thread_info.thread_id
144+
gc_collecting = thread_info.gc_collecting
85145

86146
# Initialize thread if needed
87147
if tid not in self.threads:
88148
self.threads[tid] = self._create_thread(tid)
89149

90150
thread_data = self.threads[tid]
91151

152+
# Decode status flags
153+
status_flags = thread_info.status
154+
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
155+
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
156+
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
157+
158+
# Track GIL possession (Has GIL / No GIL)
159+
self._track_state_transition(
160+
tid, has_gil, self.has_gil_start, self.no_gil_start,
161+
"Has GIL", "No GIL", CATEGORY_GIL, current_time
162+
)
163+
164+
# Track CPU state (On CPU / Off CPU)
165+
self._track_state_transition(
166+
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
167+
"On CPU", "Off CPU", CATEGORY_CPU, current_time
168+
)
169+
170+
# Track code type (Python Code / Native Code)
171+
# This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
172+
if has_gil:
173+
self._track_state_transition(
174+
tid, True, self.python_code_start, self.native_code_start,
175+
"Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
176+
)
177+
elif on_cpu:
178+
self._track_state_transition(
179+
tid, True, self.native_code_start, self.python_code_start,
180+
"Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
181+
)
182+
else:
183+
# Thread is idle (neither has GIL nor on CPU) - close any open code markers
184+
# This handles the third state that _track_state_transition doesn't cover
185+
if tid in self.initialized_threads:
186+
if tid in self.python_code_start:
187+
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
188+
current_time, CATEGORY_CODE_TYPE)
189+
if tid in self.native_code_start:
190+
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
191+
current_time, CATEGORY_CODE_TYPE)
192+
193+
# Track "Waiting for GIL" intervals (one-sided tracking)
194+
if gil_requested:
195+
self.gil_wait_start.setdefault(tid, current_time)
196+
elif tid in self.gil_wait_start:
197+
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
198+
current_time, CATEGORY_GIL)
199+
200+
# Track GC events - attribute to all threads that hold the GIL during GC
201+
# (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
202+
# If GIL switches during GC, multiple threads will get GC markers
203+
if gc_collecting and has_gil:
204+
# Start GC marker if not already started for this thread
205+
if tid not in self.gc_start_per_thread:
206+
self.gc_start_per_thread[tid] = current_time
207+
elif tid in self.gc_start_per_thread:
208+
# End GC marker if it was running for this thread
209+
# (either GC finished or thread lost GIL)
210+
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
211+
current_time, CATEGORY_GC)
212+
213+
# Mark thread as initialized after processing all state transitions
214+
self.initialized_threads.add(tid)
215+
216+
# Categorize: idle if neither has GIL nor on CPU
217+
is_idle = not has_gil and not on_cpu
218+
219+
# Skip idle threads if skip_idle is enabled
220+
if self.skip_idle and is_idle:
221+
continue
222+
223+
if not frames:
224+
continue
225+
92226
# Process the stack
93227
stack_index = self._process_stack(thread_data, frames)
94228

@@ -102,7 +236,6 @@ def collect(self, stack_frames):
102236

103237
def _create_thread(self, tid):
104238
"""Create a new thread structure with processed profile format."""
105-
import threading
106239

107240
# Determine if this is the main thread
108241
try:
@@ -181,7 +314,7 @@ def _create_thread(self, tid):
181314
"functionSize": [],
182315
"length": 0,
183316
},
184-
# Markers - processed format
317+
# Markers - processed format (arrays)
185318
"markers": {
186319
"data": [],
187320
"name": [],
@@ -215,6 +348,27 @@ def _intern_string(self, s):
215348
self.global_string_map[s] = idx
216349
return idx
217350

351+
def _add_marker(self, tid, name, start_time, end_time, category):
352+
"""Add an interval marker for a specific thread."""
353+
if tid not in self.threads:
354+
return
355+
356+
thread_data = self.threads[tid]
357+
duration = end_time - start_time
358+
359+
name_idx = self._intern_string(name)
360+
markers = thread_data["markers"]
361+
markers["name"].append(name_idx)
362+
markers["startTime"].append(start_time)
363+
markers["endTime"].append(end_time)
364+
markers["phase"].append(1) # 1 = interval marker
365+
markers["category"].append(category)
366+
markers["data"].append({
367+
"type": name.replace(" ", ""),
368+
"duration": duration,
369+
"tid": tid
370+
})
371+
218372
def _process_stack(self, thread_data, frames):
219373
"""Process a stack and return the stack index."""
220374
if not frames:
@@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
383537
frame_cache[frame_key] = frame_idx
384538
return frame_idx
385539

540+
def _finalize_markers(self):
541+
"""Close any open markers at the end of profiling."""
542+
end_time = self.last_sample_time
543+
544+
# Close all open markers for each thread using a generic approach
545+
marker_states = [
546+
(self.has_gil_start, "Has GIL", CATEGORY_GIL),
547+
(self.no_gil_start, "No GIL", CATEGORY_GIL),
548+
(self.on_cpu_start, "On CPU", CATEGORY_CPU),
549+
(self.off_cpu_start, "Off CPU", CATEGORY_CPU),
550+
(self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
551+
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
552+
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
553+
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
554+
]
555+
556+
for state_dict, marker_name, category in marker_states:
557+
for tid in list(state_dict.keys()):
558+
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
559+
del state_dict[tid]
560+
386561
def export(self, filename):
387562
"""Export the profile to a Gecko JSON file."""
563+
388564
if self.sample_count > 0 and self.last_sample_time > 0:
389565
self.interval = self.last_sample_time / self.sample_count
390566

391-
profile = self._build_profile()
567+
# Spinner for progress indication
568+
spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'])
569+
stop_spinner = threading.Event()
570+
571+
def spin():
572+
message = 'Building Gecko profile...'
573+
while not stop_spinner.is_set():
574+
sys.stderr.write(f'\r{next(spinner)} {message}')
575+
sys.stderr.flush()
576+
time.sleep(0.1)
577+
# Clear the spinner line
578+
sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
579+
sys.stderr.flush()
580+
581+
spinner_thread = threading.Thread(target=spin, daemon=True)
582+
spinner_thread.start()
583+
584+
try:
585+
# Finalize any open markers before building profile
586+
self._finalize_markers()
587+
588+
profile = self._build_profile()
392589

393-
with open(filename, "w") as f:
394-
json.dump(profile, f, separators=(",", ":"))
590+
with open(filename, "w") as f:
591+
json.dump(profile, f, separators=(",", ":"))
592+
finally:
593+
stop_spinner.set()
594+
spinner_thread.join(timeout=1.0)
595+
# Small delay to ensure the clear happens
596+
time.sleep(0.01)
395597

396598
print(f"Gecko profile written to {filename}")
397599
print(
@@ -416,6 +618,7 @@ def _build_profile(self):
416618
frame_table["length"] = len(frame_table["func"])
417619
func_table["length"] = len(func_table["name"])
418620
resource_table["length"] = len(resource_table["name"])
621+
thread_data["markers"]["length"] = len(thread_data["markers"]["name"])
419622

420623
# Clean up internal caches
421624
del thread_data["_stackCache"]

0 commit comments

Comments
 (0)