Skip to content

Commit 0ae7d2c

Browse files
gh-135953: Add Gecko reporter to sampling profiler
1 parent bc7b511 commit 0ae7d2c

File tree

3 files changed

+472
-3
lines changed

3 files changed

+472
-3
lines changed
Lines changed: 391 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,391 @@
1+
"""Gecko Profile format.
2+
3+
Provides dataclasses and builder for generating Gecko profile format
4+
"""
5+
6+
import platform
7+
from dataclasses import dataclass, field
8+
from typing import List, Dict, Optional, Any
9+
10+
from Lib.profiling.sampling.string_table import StringTable
11+
12+
PYTHON_CATEGORY = 0
13+
OTHER_CATEGORY = 1
14+
NATIVE_IMPLEMENTATION = None
15+
GECKO_VERSION = 28
16+
17+
GECKO_CATEGORIES = [
18+
{"name": "Python", "color": "blue", "subcategories": ["Other"]},
19+
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
20+
]
21+
22+
@dataclass
23+
class GeckoFrameTableSchema:
24+
location: int = 0
25+
relevantForJS: int = 1
26+
innerWindowID: int = 2
27+
implementation: int = 3
28+
optimizations: int = 4
29+
line: int = 5
30+
column: int = 6
31+
category: int = 7
32+
subcategory: int = 8
33+
34+
def to_dict(self) -> Dict[str, int]:
35+
return {
36+
"location": self.location,
37+
"relevantForJS": self.relevantForJS,
38+
"innerWindowID": self.innerWindowID,
39+
"implementation": self.implementation,
40+
"optimizations": self.optimizations,
41+
"line": self.line,
42+
"column": self.column,
43+
"category": self.category,
44+
"subcategory": self.subcategory
45+
}
46+
47+
@dataclass
48+
class GeckoStackTableSchema:
49+
prefix: int = 0
50+
frame: int = 1
51+
52+
def to_dict(self) -> Dict[str, int]:
53+
return {
54+
"prefix": self.prefix,
55+
"frame": self.frame
56+
}
57+
58+
@dataclass
59+
class GeckoSampleSchema:
60+
stack: int = 0
61+
time: int = 1
62+
eventDelay: int = 2
63+
64+
def to_dict(self) -> Dict[str, int]:
65+
return {
66+
"stack": self.stack,
67+
"time": self.time,
68+
"eventDelay": self.eventDelay
69+
}
70+
71+
@dataclass
72+
class GeckoMarkersSchema:
73+
name: int = 0
74+
startTime: int = 1
75+
endTime: int = 2
76+
phase: int = 3
77+
category: int = 4
78+
data: int = 5
79+
80+
def to_dict(self) -> Dict[str, int]:
81+
return {
82+
"name": self.name,
83+
"startTime": self.startTime,
84+
"endTime": self.endTime,
85+
"phase": self.phase,
86+
"category": self.category,
87+
"data": self.data
88+
}
89+
90+
@dataclass
91+
class GeckoFrame:
92+
location_id: int
93+
relevant_for_js: bool = False
94+
inner_window_id: int = 0
95+
implementation: Optional[str] = None
96+
optimizations: Optional[str] = None
97+
line: Optional[int] = None
98+
column: Optional[int] = None
99+
category: int = 0
100+
subcategory: int = 0
101+
102+
def to_array(self) -> List[Any]:
103+
return [
104+
self.location_id,
105+
self.relevant_for_js,
106+
self.inner_window_id,
107+
self.implementation,
108+
self.optimizations,
109+
self.line,
110+
self.column,
111+
self.category,
112+
self.subcategory
113+
]
114+
115+
@dataclass
116+
class GeckoStack:
117+
prefix_id: Optional[int]
118+
frame_id: int
119+
120+
def to_array(self) -> List[Any]:
121+
return [self.prefix_id, self.frame_id]
122+
123+
@dataclass
124+
class GeckoSample:
125+
stack_id: Optional[int]
126+
time_ms: float
127+
event_delay: float = 0.0
128+
129+
def to_array(self) -> List[Any]:
130+
return [self.stack_id, self.time_ms, self.event_delay]
131+
132+
@dataclass
133+
class GeckoFrameTable:
134+
schema: GeckoFrameTableSchema = field(default_factory=GeckoFrameTableSchema)
135+
data: List[List[Any]] = field(default_factory=list)
136+
137+
def to_dict(self) -> Dict[str, Any]:
138+
return {
139+
"schema": self.schema.to_dict(),
140+
"data": self.data
141+
}
142+
143+
@dataclass
144+
class GeckoStackTable:
145+
schema: GeckoStackTableSchema = field(default_factory=GeckoStackTableSchema)
146+
data: List[List[Any]] = field(default_factory=list)
147+
148+
def to_dict(self) -> Dict[str, Any]:
149+
return {
150+
"schema": self.schema.to_dict(),
151+
"data": self.data
152+
}
153+
154+
@dataclass
155+
class GeckoSamples:
156+
schema: GeckoSampleSchema = field(default_factory=GeckoSampleSchema)
157+
data: List[List[Any]] = field(default_factory=list)
158+
159+
def to_dict(self) -> Dict[str, Any]:
160+
return {
161+
"schema": self.schema.to_dict(),
162+
"data": self.data
163+
}
164+
165+
@dataclass
166+
class GeckoMarkers:
167+
schema: GeckoMarkersSchema = field(default_factory=GeckoMarkersSchema)
168+
data: List[List[Any]] = field(default_factory=list)
169+
170+
def to_dict(self) -> Dict[str, Any]:
171+
return {
172+
"schema": self.schema.to_dict(),
173+
"data": self.data
174+
}
175+
176+
@dataclass
177+
class GeckoThread:
178+
name: str
179+
isMainThread: bool = False
180+
processType: str = "default"
181+
processName: str = "Python"
182+
processStartupTime: float = 0.0
183+
processShutdownTime: Optional[float] = None
184+
registerTime: int = 0
185+
unregisterTime: Optional[int] = None
186+
pausedRanges: List[Any] = field(default_factory=list)
187+
pid: int = 0
188+
tid: int = 0
189+
stringTable: List[str] = field(default_factory=list)
190+
frameTable: GeckoFrameTable = field(default_factory=GeckoFrameTable)
191+
stackTable: GeckoStackTable = field(default_factory=GeckoStackTable)
192+
samples: GeckoSamples = field(default_factory=GeckoSamples)
193+
markers: GeckoMarkers = field(default_factory=GeckoMarkers)
194+
195+
def to_dict(self) -> Dict[str, Any]:
196+
return {
197+
"name": self.name,
198+
"isMainThread": self.isMainThread,
199+
"processType": self.processType,
200+
"processName": self.processName,
201+
"processStartupTime": self.processStartupTime,
202+
"processShutdownTime": self.processShutdownTime,
203+
"registerTime": self.registerTime,
204+
"unregisterTime": self.unregisterTime,
205+
"pausedRanges": self.pausedRanges,
206+
"pid": self.pid,
207+
"tid": self.tid,
208+
"stringTable": self.stringTable,
209+
"frameTable": self.frameTable.to_dict(),
210+
"stackTable": self.stackTable.to_dict(),
211+
"samples": self.samples.to_dict(),
212+
"markers": self.markers.to_dict()
213+
}
214+
215+
@dataclass
216+
class GeckoMeta:
217+
version: int = GECKO_VERSION
218+
startTime: float = 0.0
219+
shutdownTime: Optional[float] = None
220+
categories: List[Dict[str, Any]] = field(default_factory=lambda: GECKO_CATEGORIES)
221+
markerSchema: List[Any] = field(default_factory=list)
222+
interval: int = 1
223+
stackwalk: int = 1
224+
debug: int = 0
225+
gcpoison: int = 0
226+
processType: int = 0
227+
presymbolicated: bool = True
228+
platform: str = field(default_factory=platform.system)
229+
oscpu: str = field(default_factory=lambda: f"{platform.machine()} {platform.system()} {platform.release()}")
230+
misc: str = field(default_factory=lambda: f"Python {platform.python_version()}")
231+
232+
def to_dict(self) -> Dict[str, Any]:
233+
return {
234+
"version": self.version,
235+
"startTime": self.startTime,
236+
"shutdownTime": self.shutdownTime,
237+
"categories": self.categories,
238+
"markerSchema": self.markerSchema,
239+
"interval": self.interval,
240+
"stackwalk": self.stackwalk,
241+
"debug": self.debug,
242+
"gcpoison": self.gcpoison,
243+
"processType": self.processType,
244+
"presymbolicated": self.presymbolicated,
245+
"platform": self.platform,
246+
"oscpu": self.oscpu,
247+
"misc": self.misc,
248+
}
249+
250+
@dataclass
251+
class GeckoProfile:
252+
meta: Dict[str, Any]
253+
libs: List[Any] = field(default_factory=list)
254+
pages: List[Any] = field(default_factory=list)
255+
pausedRanges: List[Any] = field(default_factory=list)
256+
threads: List[Dict[str, Any]] = field(default_factory=list)
257+
processes: List[Any] = field(default_factory=list)
258+
259+
def to_dict(self) -> Dict[str, Any]:
260+
return {
261+
"meta": self.meta,
262+
"libs": self.libs,
263+
"pages": self.pages,
264+
"pausedRanges": self.pausedRanges,
265+
"threads": self.threads,
266+
"processes": self.processes
267+
}
268+
269+
class GeckoBuilder:
270+
def __init__(self, string_table: StringTable, start_time: float = None):
271+
self.string_table = string_table
272+
self.start_time = start_time
273+
self.threads_data = {}
274+
275+
def add_sample(self, frames_list, timestamp, thread_id=0):
276+
if thread_id not in self.threads_data:
277+
self.threads_data[thread_id] = {
278+
"frames": [],
279+
"frame_map": {},
280+
"stacks": [],
281+
"stack_map": {},
282+
"samples": []
283+
}
284+
285+
thread_data = self.threads_data[thread_id]
286+
287+
frame_ids = []
288+
for frame in frames_list:
289+
if frame not in thread_data["frame_map"]:
290+
frame_index = len(thread_data["frames"])
291+
thread_data["frame_map"][frame] = frame_index
292+
293+
filename, lineno, funcname = frame
294+
location_id = self.string_table.intern(funcname)
295+
296+
gecko_frame = GeckoFrame(
297+
location_id=location_id,
298+
relevant_for_js=False,
299+
inner_window_id=0,
300+
implementation=None,
301+
optimizations=None,
302+
line=lineno,
303+
column=None,
304+
category=self._get_frame_category(frame),
305+
subcategory=0
306+
)
307+
308+
thread_data["frames"].append(gecko_frame)
309+
310+
frame_ids.append(thread_data["frame_map"][frame])
311+
312+
stack_id = None
313+
for frame_id in reversed(frame_ids):
314+
stack_key = (stack_id, frame_id)
315+
if stack_key not in thread_data["stack_map"]:
316+
stack_index = len(thread_data["stacks"])
317+
thread_data["stack_map"][stack_key] = stack_index
318+
319+
gecko_stack = GeckoStack(
320+
prefix_id=stack_id,
321+
frame_id=frame_id
322+
)
323+
thread_data["stacks"].append(gecko_stack)
324+
stack_id = stack_index
325+
else:
326+
stack_id = thread_data["stack_map"][stack_key]
327+
328+
gecko_sample = GeckoSample(
329+
stack_id=stack_id,
330+
time_ms=timestamp * 1000,
331+
event_delay=0.0
332+
)
333+
thread_data["samples"].append(gecko_sample)
334+
335+
def _get_frame_category(self, frame):
336+
"""
337+
Determine frame category based on frame information.
338+
Frame is a tuple: (filename, lineno, funcname)
339+
Can be extended to use any part of the frame for categorization.
340+
341+
TODO: Change this once frames have a type/category field
342+
"""
343+
filename, _, _ = frame
344+
if filename and filename.endswith('.py'):
345+
return PYTHON_CATEGORY
346+
else:
347+
return OTHER_CATEGORY
348+
349+
def build_profile(self):
350+
threads = []
351+
352+
for thread_id, thread_data in self.threads_data.items():
353+
frame_data = [frame.to_array() for frame in thread_data["frames"]]
354+
stack_data = [stack.to_array() for stack in thread_data["stacks"]]
355+
sample_data = [sample.to_array() for sample in thread_data["samples"]]
356+
357+
gecko_thread = GeckoThread(
358+
name=f"Thread {thread_id}",
359+
isMainThread=(thread_id == 0),
360+
processType="default",
361+
processName="Python",
362+
processStartupTime=(self.start_time or 0) * 1000,
363+
processShutdownTime=None,
364+
registerTime=0,
365+
unregisterTime=None,
366+
pausedRanges=[],
367+
pid=thread_id,
368+
tid=thread_id,
369+
stringTable=self.string_table.get_strings(),
370+
frameTable=GeckoFrameTable(data=frame_data),
371+
stackTable=GeckoStackTable(data=stack_data),
372+
samples=GeckoSamples(data=sample_data),
373+
markers=GeckoMarkers(data=[])
374+
)
375+
376+
threads.append(gecko_thread.to_dict())
377+
378+
gecko_meta = GeckoMeta(
379+
startTime=(self.start_time or 0) * 1000
380+
)
381+
382+
gecko_profile = GeckoProfile(
383+
meta=gecko_meta.to_dict(),
384+
libs=[],
385+
pages=[],
386+
pausedRanges=[],
387+
threads=threads,
388+
processes=[]
389+
)
390+
391+
return gecko_profile.to_dict()

0 commit comments

Comments
 (0)