Skip to content

Commit 19ff11b

Browse files
committed
Emit opcode interval markers in Gecko collector
Tracks opcode state transitions per thread and emits interval markers when the executing opcode changes. Markers include opcode name, line, column, and duration. Adds Opcodes category to marker schema.
1 parent aedc000 commit 19ff11b

File tree

1 file changed

+111
-9
lines changed

1 file changed

+111
-9
lines changed

Lib/profiling/sampling/gecko_collector.py

Lines changed: 111 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import time
88

99
from .collector import Collector
10+
from .opcode_utils import get_opcode_info, format_opcode
1011
try:
1112
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
1213
except ImportError:
@@ -26,6 +27,7 @@
2627
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
2728
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
2829
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
30+
{"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]},
2931
]
3032

3133
# Category indices
@@ -36,6 +38,7 @@
3638
CATEGORY_GIL = 4
3739
CATEGORY_CPU = 5
3840
CATEGORY_CODE_TYPE = 6
41+
CATEGORY_OPCODES = 7
3942

4043
# Subcategory indices
4144
DEFAULT_SUBCATEGORY = 0
@@ -56,9 +59,10 @@
5659

5760

5861
class GeckoCollector(Collector):
59-
def __init__(self, sample_interval_usec, *, skip_idle=False):
62+
def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
6063
self.sample_interval_usec = sample_interval_usec
6164
self.skip_idle = skip_idle
65+
self.opcodes_enabled = opcodes
6266
self.start_time = time.time() * 1000 # milliseconds since epoch
6367

6468
# Global string table (shared across all threads)
@@ -91,6 +95,9 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
9195
# Track which threads have been initialized for state tracking
9296
self.initialized_threads = set()
9397

98+
# Opcode state tracking per thread: tid -> (opcode, lineno, col_offset, funcname, filename, start_time)
99+
self.opcode_state = {}
100+
94101
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
95102
active_name, inactive_name, category, current_time):
96103
"""Track binary state transitions and emit markers.
@@ -232,6 +239,30 @@ def collect(self, stack_frames):
232239
samples["time"].append(current_time)
233240
samples["eventDelay"].append(None)
234241

242+
# Track opcode state changes for interval markers (leaf frame only)
243+
if self.opcodes_enabled:
244+
leaf_frame = frames[0]
245+
filename, location, funcname, opcode = leaf_frame
246+
if isinstance(location, tuple):
247+
lineno, _, col_offset, _ = location
248+
else:
249+
lineno = location
250+
col_offset = -1
251+
252+
current_state = (opcode, lineno, col_offset, funcname, filename)
253+
254+
if tid not in self.opcode_state:
255+
# First observation - start tracking
256+
self.opcode_state[tid] = (*current_state, current_time)
257+
elif self.opcode_state[tid][:5] != current_state:
258+
# State changed - emit marker for previous state
259+
prev_opcode, prev_lineno, prev_col, prev_funcname, prev_filename, prev_start = self.opcode_state[tid]
260+
self._add_opcode_interval_marker(
261+
tid, prev_opcode, prev_lineno, prev_col, prev_funcname, prev_start, current_time
262+
)
263+
# Start tracking new state
264+
self.opcode_state[tid] = (*current_state, current_time)
265+
235266
self.sample_count += 1
236267

237268
def _create_thread(self, tid):
@@ -369,6 +400,36 @@ def _add_marker(self, tid, name, start_time, end_time, category):
369400
"tid": tid
370401
})
371402

403+
def _add_opcode_interval_marker(self, tid, opcode, lineno, col_offset, funcname, start_time, end_time):
404+
"""Add an interval marker for opcode execution span."""
405+
if tid not in self.threads or opcode is None:
406+
return
407+
408+
thread_data = self.threads[tid]
409+
opcode_info = get_opcode_info(opcode)
410+
# Use formatted opcode name (with base opcode for specialized ones)
411+
formatted_opname = format_opcode(opcode)
412+
413+
name_idx = self._intern_string(formatted_opname)
414+
415+
markers = thread_data["markers"]
416+
markers["name"].append(name_idx)
417+
markers["startTime"].append(start_time)
418+
markers["endTime"].append(end_time)
419+
markers["phase"].append(1) # 1 = interval marker
420+
markers["category"].append(CATEGORY_OPCODES)
421+
markers["data"].append({
422+
"type": "Opcode",
423+
"opcode": opcode,
424+
"opname": formatted_opname,
425+
"base_opname": opcode_info["base_opname"],
426+
"is_specialized": opcode_info["is_specialized"],
427+
"line": lineno,
428+
"column": col_offset if col_offset >= 0 else None,
429+
"function": funcname,
430+
"duration": end_time - start_time,
431+
})
432+
372433
def _process_stack(self, thread_data, frames):
373434
"""Process a stack and return the stack index."""
374435
if not frames:
@@ -386,17 +447,25 @@ def _process_stack(self, thread_data, frames):
386447
prefix_stack_idx = None
387448

388449
for frame_tuple in reversed(frames):
389-
# frame_tuple is (filename, lineno, funcname)
390-
filename, lineno, funcname = frame_tuple
450+
# frame_tuple is (filename, location, funcname, opcode)
451+
# location is (lineno, end_lineno, col_offset, end_col_offset) or just lineno
452+
filename, location, funcname, opcode = frame_tuple
453+
if isinstance(location, tuple):
454+
lineno, end_lineno, col_offset, end_col_offset = location
455+
else:
456+
# Legacy format: location is just lineno
457+
lineno = location
458+
col_offset = -1
459+
end_col_offset = -1
391460

392461
# Get or create function
393462
func_idx = self._get_or_create_func(
394463
thread_data, filename, funcname, lineno
395464
)
396465

397-
# Get or create frame
466+
# Get or create frame (include column for precise source location)
398467
frame_idx = self._get_or_create_frame(
399-
thread_data, func_idx, lineno
468+
thread_data, func_idx, lineno, col_offset
400469
)
401470

402471
# Check stack cache
@@ -494,10 +563,11 @@ def _get_or_create_resource(self, thread_data, filename):
494563
resource_cache[filename] = resource_idx
495564
return resource_idx
496565

497-
def _get_or_create_frame(self, thread_data, func_idx, lineno):
566+
def _get_or_create_frame(self, thread_data, func_idx, lineno, col_offset=-1):
498567
"""Get or create a frame entry."""
499568
frame_cache = thread_data["_frameCache"]
500-
frame_key = (func_idx, lineno)
569+
# Include column in cache key for precise frame identification
570+
frame_key = (func_idx, lineno, col_offset if col_offset >= 0 else None)
501571

502572
if frame_key in frame_cache:
503573
return frame_cache[frame_key]
@@ -531,7 +601,8 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
531601
frame_inner_window_ids.append(None)
532602
frame_implementations.append(None)
533603
frame_lines.append(lineno if lineno else None)
534-
frame_columns.append(None)
604+
# Store column offset if available (>= 0), otherwise None
605+
frame_columns.append(col_offset if col_offset >= 0 else None)
535606
frame_optimizations.append(None)
536607

537608
frame_cache[frame_key] = frame_idx
@@ -558,6 +629,12 @@ def _finalize_markers(self):
558629
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
559630
del state_dict[tid]
560631

632+
# Close any open opcode markers
633+
for tid, state in list(self.opcode_state.items()):
634+
opcode, lineno, col_offset, funcname, filename, start_time = state
635+
self._add_opcode_interval_marker(tid, opcode, lineno, col_offset, funcname, start_time, end_time)
636+
self.opcode_state.clear()
637+
561638
def export(self, filename):
562639
"""Export the profile to a Gecko JSON file."""
563640

@@ -600,6 +677,31 @@ def spin():
600677
f"Open in Firefox Profiler: https://profiler.firefox.com/"
601678
)
602679

680+
def _build_marker_schema(self):
681+
"""Build marker schema definitions for Firefox Profiler."""
682+
schema = []
683+
684+
# Opcode marker schema (only if opcodes enabled)
685+
if self.opcodes_enabled:
686+
schema.append({
687+
"name": "Opcode",
688+
"display": ["marker-table", "marker-chart"],
689+
"tooltipLabel": "{marker.data.opname}",
690+
"tableLabel": "{marker.data.opname} at line {marker.data.line}",
691+
"chartLabel": "{marker.data.opname}",
692+
"fields": [
693+
{"key": "opname", "label": "Opcode", "format": "string", "searchable": True},
694+
{"key": "base_opname", "label": "Base Opcode", "format": "string"},
695+
{"key": "is_specialized", "label": "Specialized", "format": "string"},
696+
{"key": "line", "label": "Line", "format": "integer"},
697+
{"key": "column", "label": "Column", "format": "integer"},
698+
{"key": "function", "label": "Function", "format": "string"},
699+
{"key": "duration", "label": "Duration", "format": "duration"},
700+
],
701+
})
702+
703+
return schema
704+
603705
def _build_profile(self):
604706
"""Build the complete profile structure in processed format."""
605707
# Convert thread data to final format
@@ -649,7 +751,7 @@ def _build_profile(self):
649751
"CPUName": "",
650752
"product": "Python",
651753
"symbolicated": True,
652-
"markerSchema": [],
754+
"markerSchema": self._build_marker_schema(),
653755
"importedFrom": "Tachyon Sampling Profiler",
654756
"extensions": {
655757
"id": [],

0 commit comments

Comments
 (0)