Skip to content

Commit f7907f3

Browse files
committed
Add profiler to track generation paths
1 parent 06b4e2c commit f7907f3

File tree

2 files changed

+285
-1
lines changed

2 files changed

+285
-1
lines changed

rpython/jit/codewriter/genextension.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
from rpython.rtyper.lltypesystem import lltype, llmemory, rstr
1212
from rpython.rtyper.rclass import OBJECTPTR
1313
from rpython.rlib import objectmodel
14+
from rpython.jit.codewriter.genextprof import (
15+
SLOWPATH_PROFILE_ENABLED, get_profiler, classify_opcode,
16+
REASON_NO_UNSPEC_METHOD, REASON_NO_SPEC_METHOD,
17+
REASON_UNSUPPORTED_SPEC, REASON_UNSUPPORTED_UNSPEC,
18+
REASON_SPEC_RETURNED_NONE, REASON_UNSPEC_RETURNED_NONE,
19+
REASON_IS_CALL, REASON_IS_GUARD, REASON_IS_MEMORY_OP,
20+
REASON_HAS_LABEL_ARG, REASON_IS_LIVE_OP, REASON_NEWFRAME,
21+
REASON_FAST_PATH
22+
)
1423

1524
HEAPCACHE_SKIP_OPS = frozenset([
1625
# Integer binary operations
@@ -575,7 +584,11 @@ def make_code(self):
575584
code_and_spec_per_pc = {}
576585
while self.todo:
577586
spec = self.todo.popleft()
578-
code_and_spec_per_pc[spec.spec_pc] = spec.make_code(), spec
587+
if SLOWPATH_PROFILE_ENABLED:
588+
code = spec.make_code_with_profiling()
589+
else:
590+
code = spec.make_code()
591+
code_and_spec_per_pc[spec.spec_pc] = code, spec
579592
return code_and_spec_per_pc
580593

581594

@@ -653,6 +666,60 @@ def make_code(self):
653666
except Unsupported:
654667
return None
655668

669+
def make_code_with_profiling(self):
670+
profiler = get_profiler()
671+
opname = self.name.strip('-')
672+
args = self._get_args()
673+
674+
category = classify_opcode(self.name)
675+
if category == REASON_IS_LIVE_OP:
676+
return None
677+
678+
has_spec = hasattr(self, "emit_specialized_" + opname)
679+
has_unspec = hasattr(self, "emit_unspecialized_" + opname)
680+
profiler.record_method_info(opname, has_spec, has_unspec)
681+
682+
try:
683+
if not self._check_all_constant_args(args):
684+
# Non-constant args path
685+
if not has_unspec:
686+
# Determine more specific reason
687+
if category:
688+
profiler.record_codegen(opname, category)
689+
else:
690+
profiler.record_codegen(opname, REASON_NO_UNSPEC_METHOD)
691+
return None
692+
result = self._make_code_unspecialized()
693+
if result is None:
694+
profiler.record_codegen(opname, REASON_UNSPEC_RETURNED_NONE)
695+
else:
696+
profiler.record_codegen(opname, REASON_FAST_PATH)
697+
return result
698+
else:
699+
if has_spec:
700+
try:
701+
result = self._make_code_specialized()
702+
if result is None:
703+
profiler.record_codegen(opname, REASON_SPEC_RETURNED_NONE)
704+
else:
705+
profiler.record_codegen(opname, REASON_FAST_PATH)
706+
return result
707+
except Unsupported:
708+
profiler.record_codegen(opname, REASON_UNSUPPORTED_SPEC)
709+
return None
710+
else:
711+
if category:
712+
profiler.record_codegen(opname, category)
713+
else:
714+
profiler.record_codegen(opname, REASON_NO_SPEC_METHOD)
715+
return None
716+
except Unsupported:
717+
if self._check_all_constant_args(args):
718+
profiler.record_codegen(opname, REASON_UNSUPPORTED_SPEC)
719+
else:
720+
profiler.record_codegen(opname, REASON_UNSUPPORTED_UNSPEC)
721+
return None
722+
656723
def _is_label(self, arg):
657724
return isinstance(arg, Label) or isinstance(arg, TLabel)
658725

@@ -1193,6 +1260,10 @@ def emit_specialized_goto(self):
11931260
self._emit_jump(lines, label_pc)
11941261
return lines
11951262

1263+
# Unspecialized goto - label is always a compile-time constant,
1264+
# but this provides robustness for edge cases
1265+
emit_unspecialized_goto = emit_specialized_goto
1266+
11961267
def emit_specialized_goto_if_not_absolute(self, name, symbol_fmt):
11971268
if symbol_fmt == '':
11981269
symbol_fmt == '%s'
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
"""
2+
Slow-path profiler for GenExtension code generation.
3+
4+
This module instruments the JIT extension code generator to track why operations
5+
go through the slow path instead of fast-path code generation.
6+
7+
Enable with environment variable: PYPY_SLOWPATH_PROFILE=1
8+
9+
Reason codes:
10+
NO_UNSPEC_METHOD - No emit_unspecialized_<name> method exists
11+
NO_SPEC_METHOD - No emit_specialized_<name> method exists (constant args)
12+
UNSUPPORTED_SPEC - Specialized method raised Unsupported exception
13+
UNSUPPORTED_UNSPEC - Unspecialized method raised Unsupported exception
14+
SPEC_RETURNED_NONE - Specialized method returned None
15+
UNSPEC_RETURNED_NONE- Unspecialized method returned None
16+
IS_CALL - Operation is a call/inline_call
17+
IS_GUARD - Operation is a guard
18+
IS_MEMORY_OP - Operation is getfield/setfield/getarray/setarray
19+
HAS_LABEL_ARG - Operation has label argument (control flow)
20+
IS_LIVE_OP - Operation is -live- marker
21+
NEWFRAME - Operation requires new frame
22+
"""
23+
24+
import os
25+
from collections import defaultdict
26+
import atexit
27+
28+
# Enable profiling via environment variable
29+
SLOWPATH_PROFILE_ENABLED = os.environ.get('PYPY_SLOWPATH_PROFILE', '') == '1'
30+
SLOWPATH_PROFILE_VERBOSE = os.environ.get('PYPY_SLOWPATH_PROFILE_VERBOSE', '') == '1'
31+
32+
# Reason codes
33+
REASON_NO_UNSPEC_METHOD = 'NO_UNSPEC_METHOD'
34+
REASON_NO_SPEC_METHOD = 'NO_SPEC_METHOD'
35+
REASON_UNSUPPORTED_SPEC = 'UNSUPPORTED_SPEC'
36+
REASON_UNSUPPORTED_UNSPEC = 'UNSUPPORTED_UNSPEC'
37+
REASON_SPEC_RETURNED_NONE = 'SPEC_RETURNED_NONE'
38+
REASON_UNSPEC_RETURNED_NONE = 'UNSPEC_RETURNED_NONE'
39+
REASON_IS_CALL = 'IS_CALL'
40+
REASON_IS_GUARD = 'IS_GUARD'
41+
REASON_IS_MEMORY_OP = 'IS_MEMORY_OP'
42+
REASON_HAS_LABEL_ARG = 'HAS_LABEL_ARG'
43+
REASON_IS_LIVE_OP = 'IS_LIVE_OP'
44+
REASON_NEWFRAME = 'NEWFRAME'
45+
REASON_FAST_PATH = 'FAST_PATH' # Successfully used fast path
46+
47+
48+
class SlowPathProfiler(object):
49+
"""Tracks slow-path reasons during code generation."""
50+
51+
_instance = None
52+
53+
def __init__(self):
54+
# (opname, reason) -> count at codegen time
55+
self.codegen_counts = defaultdict(int)
56+
# (opname, reason) -> runtime execution count
57+
self.runtime_counts = defaultdict(int)
58+
# Track unique opcodes seen
59+
self.all_opcodes = set()
60+
# Track which opcodes have which methods
61+
self.opcode_method_info = {} # opname -> {'has_spec': bool, 'has_unspec': bool}
62+
63+
@classmethod
64+
def get_instance(cls):
65+
if cls._instance is None:
66+
cls._instance = cls()
67+
if SLOWPATH_PROFILE_ENABLED:
68+
atexit.register(cls._instance.print_report)
69+
return cls._instance
70+
71+
def record_codegen(self, opname, reason, count=1):
72+
"""Record a slow-path reason during code generation."""
73+
if not SLOWPATH_PROFILE_ENABLED:
74+
return
75+
self.codegen_counts[(opname, reason)] += count
76+
self.all_opcodes.add(opname)
77+
if SLOWPATH_PROFILE_VERBOSE:
78+
print("[SLOWPATH] codegen: %s -> %s" % (opname, reason))
79+
80+
def record_method_info(self, opname, has_specialized, has_unspecialized):
81+
"""Record which methods exist for an opcode."""
82+
if not SLOWPATH_PROFILE_ENABLED:
83+
return
84+
self.opcode_method_info[opname] = {
85+
'has_spec': has_specialized,
86+
'has_unspec': has_unspecialized
87+
}
88+
89+
def increment_runtime(self, opname, reason):
90+
"""Called at runtime to increment execution counter."""
91+
self.runtime_counts[(opname, reason)] += 1
92+
93+
def get_runtime_increment_code(self, opname, reason):
94+
"""Generate code that increments runtime counter."""
95+
if not SLOWPATH_PROFILE_ENABLED:
96+
return []
97+
return [
98+
"from rpython.jit.codewriter.slowpath_profiler import SlowPathProfiler",
99+
"SlowPathProfiler.get_instance().increment_runtime(%r, %r)" % (opname, reason)
100+
]
101+
102+
def print_report(self):
103+
"""Print the slow-path analysis report."""
104+
if not self.codegen_counts and not self.runtime_counts:
105+
return
106+
107+
print("\n" + "=" * 80)
108+
print("SLOW-PATH PROFILER REPORT")
109+
print("=" * 80)
110+
111+
# Aggregate by reason
112+
reason_totals = defaultdict(int)
113+
for (opname, reason), count in self.codegen_counts.items():
114+
reason_totals[reason] += count
115+
116+
total_ops = sum(self.codegen_counts.values())
117+
fast_path_count = reason_totals.get(REASON_FAST_PATH, 0)
118+
slow_path_count = total_ops - fast_path_count
119+
120+
print("\n--- SUMMARY ---")
121+
print("Total operations analyzed: %d" % total_ops)
122+
print("Fast path operations: %d (%.1f%%)" % (
123+
fast_path_count, 100.0 * fast_path_count / total_ops if total_ops else 0))
124+
print("Slow path operations: %d (%.1f%%)" % (
125+
slow_path_count, 100.0 * slow_path_count / total_ops if total_ops else 0))
126+
127+
print("\n--- SLOW-PATH BREAKDOWN BY REASON ---")
128+
print("%-25s %10s %8s" % ("Reason", "Count", "Share%"))
129+
print("-" * 45)
130+
for reason, count in sorted(reason_totals.items(), key=lambda x: -x[1]):
131+
if reason == REASON_FAST_PATH:
132+
continue
133+
pct = 100.0 * count / slow_path_count if slow_path_count else 0
134+
print("%-25s %10d %7.1f%%" % (reason, count, pct))
135+
136+
print("\n--- TOP 30 SLOW-PATH (OPCODE, REASON) PAIRS ---")
137+
print("%-35s %-25s %10s %8s" % ("Opcode", "Reason", "Count", "Share%"))
138+
print("-" * 80)
139+
140+
# Filter out fast path and sort by count
141+
slow_pairs = [(k, v) for k, v in self.codegen_counts.items()
142+
if k[1] != REASON_FAST_PATH]
143+
slow_pairs.sort(key=lambda x: -x[1])
144+
145+
for (opname, reason), count in slow_pairs[:30]:
146+
pct = 100.0 * count / slow_path_count if slow_path_count else 0
147+
print("%-35s %-25s %10d %7.1f%%" % (opname, reason, count, pct))
148+
149+
# Report surprising invariants
150+
self._report_invariants()
151+
152+
print("\n" + "=" * 80)
153+
154+
def _report_invariants(self):
155+
"""Report surprising patterns in the data."""
156+
print("\n--- SURPRISING INVARIANTS ---")
157+
158+
# Find opcodes that have both fast and slow path hits
159+
opcode_reasons = defaultdict(set)
160+
for (opname, reason), count in self.codegen_counts.items():
161+
if count > 0:
162+
opcode_reasons[opname].add(reason)
163+
164+
mixed_opcodes = []
165+
for opname, reasons in opcode_reasons.items():
166+
if REASON_FAST_PATH in reasons and len(reasons) > 1:
167+
mixed_opcodes.append((opname, reasons - {REASON_FAST_PATH}))
168+
169+
if mixed_opcodes:
170+
print("\nOpcodes with BOTH fast and slow path (check for missed optimization):")
171+
for opname, slow_reasons in sorted(mixed_opcodes):
172+
fast_count = self.codegen_counts.get((opname, REASON_FAST_PATH), 0)
173+
slow_count = sum(self.codegen_counts.get((opname, r), 0) for r in slow_reasons)
174+
print(" %s: fast=%d, slow=%d (reasons: %s)" % (
175+
opname, fast_count, slow_count, ', '.join(sorted(slow_reasons))))
176+
177+
# Find opcodes with unspecialized method that still go slow due to UNSUPPORTED
178+
unsupported_with_method = []
179+
for opname, info in self.opcode_method_info.items():
180+
if info.get('has_unspec'):
181+
count = self.codegen_counts.get((opname, REASON_UNSUPPORTED_UNSPEC), 0)
182+
if count > 0:
183+
unsupported_with_method.append((opname, count))
184+
185+
if unsupported_with_method:
186+
print("\nOpcodes with unspecialized method that raised Unsupported:")
187+
for opname, count in sorted(unsupported_with_method, key=lambda x: -x[1])[:10]:
188+
print(" %s: %d times" % (opname, count))
189+
190+
191+
def classify_opcode(opname):
192+
"""Classify an opcode into categories for analysis."""
193+
if opname.startswith('-'):
194+
if opname == '-live-':
195+
return REASON_IS_LIVE_OP
196+
return None
197+
198+
if 'call' in opname or 'inline_call' in opname:
199+
return REASON_IS_CALL
200+
201+
if 'guard' in opname:
202+
return REASON_IS_GUARD
203+
204+
if any(x in opname for x in ['getfield', 'setfield', 'getarrayitem', 'setarrayitem',
205+
'getinteriorfield', 'setinteriorfield', 'raw_load', 'raw_store']):
206+
return REASON_IS_MEMORY_OP
207+
208+
return None
209+
210+
211+
def get_profiler():
212+
"""Get the singleton profiler instance."""
213+
return SlowPathProfiler.get_instance()

0 commit comments

Comments
 (0)