1+ import itertools
12import json
23import os
34import platform
5+ import sys
6+ import threading
47import time
58
6- from .collector import Collector , THREAD_STATE_RUNNING
9+ from .collector import Collector
10+ try :
11+ from _remote_debugging import THREAD_STATUS_HAS_GIL , THREAD_STATUS_ON_CPU , THREAD_STATUS_UNKNOWN , THREAD_STATUS_GIL_REQUESTED
12+ except ImportError :
13+ # Fallback if module not available (shouldn't happen in normal use)
14+ THREAD_STATUS_HAS_GIL = (1 << 0 )
15+ THREAD_STATUS_ON_CPU = (1 << 1 )
16+ THREAD_STATUS_UNKNOWN = (1 << 2 )
17+ THREAD_STATUS_GIL_REQUESTED = (1 << 3 )
718
819
920# Categories matching Firefox Profiler expectations
1021GECKO_CATEGORIES = [
1122 {"name" : "Other" , "color" : "grey" , "subcategories" : ["Other" ]},
1223 {"name" : "Python" , "color" : "yellow" , "subcategories" : ["Other" ]},
1324 {"name" : "Native" , "color" : "blue" , "subcategories" : ["Other" ]},
14- {"name" : "Idle" , "color" : "transparent" , "subcategories" : ["Other" ]},
25+ {"name" : "GC" , "color" : "orange" , "subcategories" : ["Other" ]},
26+ {"name" : "GIL" , "color" : "green" , "subcategories" : ["Other" ]},
27+ {"name" : "CPU" , "color" : "purple" , "subcategories" : ["Other" ]},
28+ {"name" : "Code Type" , "color" : "red" , "subcategories" : ["Other" ]},
1529]
1630
1731# Category indices
1832CATEGORY_OTHER = 0
1933CATEGORY_PYTHON = 1
2034CATEGORY_NATIVE = 2
21- CATEGORY_IDLE = 3
35+ CATEGORY_GC = 3
36+ CATEGORY_GIL = 4
37+ CATEGORY_CPU = 5
38+ CATEGORY_CODE_TYPE = 6
2239
2340# Subcategory indices
2441DEFAULT_SUBCATEGORY = 0
@@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
5875 self .last_sample_time = 0
5976 self .interval = 1.0 # Will be calculated from actual sampling
6077
78+ # State tracking for interval markers (tid -> start_time)
79+ self .has_gil_start = {} # Thread has the GIL
80+ self .no_gil_start = {} # Thread doesn't have the GIL
81+ self .on_cpu_start = {} # Thread is running on CPU
82+ self .off_cpu_start = {} # Thread is off CPU
83+ self .python_code_start = {} # Thread running Python code (has GIL)
84+ self .native_code_start = {} # Thread running native code (on CPU without GIL)
85+ self .gil_wait_start = {} # Thread waiting for GIL
86+
87+ # GC event tracking: track GC start time per thread
88+ self .gc_start_per_thread = {} # tid -> start_time
89+
90+ # Track which threads have been initialized for state tracking
91+ self .initialized_threads = set ()
92+
93+ def _track_state_transition (self , tid , condition , active_dict , inactive_dict ,
94+ active_name , inactive_name , category , current_time ):
95+ """Track binary state transitions and emit markers.
96+
97+ Args:
98+ tid: Thread ID
99+ condition: Whether the active state is true
100+ active_dict: Dict tracking start time of active state
101+ inactive_dict: Dict tracking start time of inactive state
102+ active_name: Name for active state marker
103+ inactive_name: Name for inactive state marker
104+ category: Gecko category for the markers
105+ current_time: Current timestamp
106+ """
107+ # On first observation of a thread, just record the current state
108+ # without creating a marker (we don't know what the previous state was)
109+ if tid not in self .initialized_threads :
110+ if condition :
111+ active_dict [tid ] = current_time
112+ else :
113+ inactive_dict [tid ] = current_time
114+ return
115+
116+ # For already-initialized threads, track transitions
117+ if condition :
118+ active_dict .setdefault (tid , current_time )
119+ if tid in inactive_dict :
120+ self ._add_marker (tid , inactive_name , inactive_dict .pop (tid ),
121+ current_time , category )
122+ else :
123+ inactive_dict .setdefault (tid , current_time )
124+ if tid in active_dict :
125+ self ._add_marker (tid , active_name , active_dict .pop (tid ),
126+ current_time , category )
127+
61128 def collect (self , stack_frames ):
62129 """Collect a sample from stack frames."""
63130 current_time = (time .time () * 1000 ) - self .start_time
@@ -69,26 +136,93 @@ def collect(self, stack_frames):
69136 ) / self .sample_count
70137 self .last_sample_time = current_time
71138
139+ # Process threads and track GC per thread
72140 for interpreter_info in stack_frames :
73141 for thread_info in interpreter_info .threads :
74- if (
75- self .skip_idle
76- and thread_info .status != THREAD_STATE_RUNNING
77- ):
78- continue
79-
80142 frames = thread_info .frame_info
81- if not frames :
82- continue
83-
84143 tid = thread_info .thread_id
144+ gc_collecting = thread_info .gc_collecting
85145
86146 # Initialize thread if needed
87147 if tid not in self .threads :
88148 self .threads [tid ] = self ._create_thread (tid )
89149
90150 thread_data = self .threads [tid ]
91151
152+ # Decode status flags
153+ status_flags = thread_info .status
154+ has_gil = bool (status_flags & THREAD_STATUS_HAS_GIL )
155+ on_cpu = bool (status_flags & THREAD_STATUS_ON_CPU )
156+ gil_requested = bool (status_flags & THREAD_STATUS_GIL_REQUESTED )
157+
158+ # Track GIL possession (Has GIL / No GIL)
159+ self ._track_state_transition (
160+ tid , has_gil , self .has_gil_start , self .no_gil_start ,
161+ "Has GIL" , "No GIL" , CATEGORY_GIL , current_time
162+ )
163+
164+ # Track CPU state (On CPU / Off CPU)
165+ self ._track_state_transition (
166+ tid , on_cpu , self .on_cpu_start , self .off_cpu_start ,
167+ "On CPU" , "Off CPU" , CATEGORY_CPU , current_time
168+ )
169+
170+ # Track code type (Python Code / Native Code)
171+ # This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
172+ if has_gil :
173+ self ._track_state_transition (
174+ tid , True , self .python_code_start , self .native_code_start ,
175+ "Python Code" , "Native Code" , CATEGORY_CODE_TYPE , current_time
176+ )
177+ elif on_cpu :
178+ self ._track_state_transition (
179+ tid , True , self .native_code_start , self .python_code_start ,
180+ "Native Code" , "Python Code" , CATEGORY_CODE_TYPE , current_time
181+ )
182+ else :
183+ # Thread is idle (neither has GIL nor on CPU) - close any open code markers
184+ # This handles the third state that _track_state_transition doesn't cover
185+ if tid in self .initialized_threads :
186+ if tid in self .python_code_start :
187+ self ._add_marker (tid , "Python Code" , self .python_code_start .pop (tid ),
188+ current_time , CATEGORY_CODE_TYPE )
189+ if tid in self .native_code_start :
190+ self ._add_marker (tid , "Native Code" , self .native_code_start .pop (tid ),
191+ current_time , CATEGORY_CODE_TYPE )
192+
193+ # Track "Waiting for GIL" intervals (one-sided tracking)
194+ if gil_requested :
195+ self .gil_wait_start .setdefault (tid , current_time )
196+ elif tid in self .gil_wait_start :
197+ self ._add_marker (tid , "Waiting for GIL" , self .gil_wait_start .pop (tid ),
198+ current_time , CATEGORY_GIL )
199+
200+ # Track GC events - attribute to all threads that hold the GIL during GC
201+ # (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
202+ # If GIL switches during GC, multiple threads will get GC markers
203+ if gc_collecting and has_gil :
204+ # Start GC marker if not already started for this thread
205+ if tid not in self .gc_start_per_thread :
206+ self .gc_start_per_thread [tid ] = current_time
207+ elif tid in self .gc_start_per_thread :
208+ # End GC marker if it was running for this thread
209+ # (either GC finished or thread lost GIL)
210+ self ._add_marker (tid , "GC Collecting" , self .gc_start_per_thread .pop (tid ),
211+ current_time , CATEGORY_GC )
212+
213+ # Mark thread as initialized after processing all state transitions
214+ self .initialized_threads .add (tid )
215+
216+ # Categorize: idle if neither has GIL nor on CPU
217+ is_idle = not has_gil and not on_cpu
218+
219+ # Skip idle threads if skip_idle is enabled
220+ if self .skip_idle and is_idle :
221+ continue
222+
223+ if not frames :
224+ continue
225+
92226 # Process the stack
93227 stack_index = self ._process_stack (thread_data , frames )
94228
@@ -102,7 +236,6 @@ def collect(self, stack_frames):
102236
103237 def _create_thread (self , tid ):
104238 """Create a new thread structure with processed profile format."""
105- import threading
106239
107240 # Determine if this is the main thread
108241 try :
@@ -181,7 +314,7 @@ def _create_thread(self, tid):
181314 "functionSize" : [],
182315 "length" : 0 ,
183316 },
184- # Markers - processed format
317+ # Markers - processed format (arrays)
185318 "markers" : {
186319 "data" : [],
187320 "name" : [],
@@ -215,6 +348,27 @@ def _intern_string(self, s):
215348 self .global_string_map [s ] = idx
216349 return idx
217350
351+ def _add_marker (self , tid , name , start_time , end_time , category ):
352+ """Add an interval marker for a specific thread."""
353+ if tid not in self .threads :
354+ return
355+
356+ thread_data = self .threads [tid ]
357+ duration = end_time - start_time
358+
359+ name_idx = self ._intern_string (name )
360+ markers = thread_data ["markers" ]
361+ markers ["name" ].append (name_idx )
362+ markers ["startTime" ].append (start_time )
363+ markers ["endTime" ].append (end_time )
364+ markers ["phase" ].append (1 ) # 1 = interval marker
365+ markers ["category" ].append (category )
366+ markers ["data" ].append ({
367+ "type" : name .replace (" " , "" ),
368+ "duration" : duration ,
369+ "tid" : tid
370+ })
371+
218372 def _process_stack (self , thread_data , frames ):
219373 """Process a stack and return the stack index."""
220374 if not frames :
@@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
383537 frame_cache [frame_key ] = frame_idx
384538 return frame_idx
385539
540+ def _finalize_markers (self ):
541+ """Close any open markers at the end of profiling."""
542+ end_time = self .last_sample_time
543+
544+ # Close all open markers for each thread using a generic approach
545+ marker_states = [
546+ (self .has_gil_start , "Has GIL" , CATEGORY_GIL ),
547+ (self .no_gil_start , "No GIL" , CATEGORY_GIL ),
548+ (self .on_cpu_start , "On CPU" , CATEGORY_CPU ),
549+ (self .off_cpu_start , "Off CPU" , CATEGORY_CPU ),
550+ (self .python_code_start , "Python Code" , CATEGORY_CODE_TYPE ),
551+ (self .native_code_start , "Native Code" , CATEGORY_CODE_TYPE ),
552+ (self .gil_wait_start , "Waiting for GIL" , CATEGORY_GIL ),
553+ (self .gc_start_per_thread , "GC Collecting" , CATEGORY_GC ),
554+ ]
555+
556+ for state_dict , marker_name , category in marker_states :
557+ for tid in list (state_dict .keys ()):
558+ self ._add_marker (tid , marker_name , state_dict [tid ], end_time , category )
559+ del state_dict [tid ]
560+
386561 def export (self , filename ):
387562 """Export the profile to a Gecko JSON file."""
563+
388564 if self .sample_count > 0 and self .last_sample_time > 0 :
389565 self .interval = self .last_sample_time / self .sample_count
390566
391- profile = self ._build_profile ()
567+ # Spinner for progress indication
568+ spinner = itertools .cycle (['⠋' , '⠙' , '⠹' , '⠸' , '⠼' , '⠴' , '⠦' , '⠧' , '⠇' , '⠏' ])
569+ stop_spinner = threading .Event ()
570+
571+ def spin ():
572+ message = 'Building Gecko profile...'
573+ while not stop_spinner .is_set ():
574+ sys .stderr .write (f'\r { next (spinner )} { message } ' )
575+ sys .stderr .flush ()
576+ time .sleep (0.1 )
577+ # Clear the spinner line
578+ sys .stderr .write ('\r ' + ' ' * (len (message ) + 3 ) + '\r ' )
579+ sys .stderr .flush ()
580+
581+ spinner_thread = threading .Thread (target = spin , daemon = True )
582+ spinner_thread .start ()
583+
584+ try :
585+ # Finalize any open markers before building profile
586+ self ._finalize_markers ()
587+
588+ profile = self ._build_profile ()
392589
393- with open (filename , "w" ) as f :
394- json .dump (profile , f , separators = ("," , ":" ))
590+ with open (filename , "w" ) as f :
591+ json .dump (profile , f , separators = ("," , ":" ))
592+ finally :
593+ stop_spinner .set ()
594+ spinner_thread .join (timeout = 1.0 )
595+ # Small delay to ensure the clear happens
596+ time .sleep (0.01 )
395597
396598 print (f"Gecko profile written to { filename } " )
397599 print (
@@ -416,6 +618,7 @@ def _build_profile(self):
416618 frame_table ["length" ] = len (frame_table ["func" ])
417619 func_table ["length" ] = len (func_table ["name" ])
418620 resource_table ["length" ] = len (resource_table ["name" ])
621+ thread_data ["markers" ]["length" ] = len (thread_data ["markers" ]["name" ])
419622
420623 # Clean up internal caches
421624 del thread_data ["_stackCache" ]
0 commit comments