|
| 1 | +""" |
| 2 | +Slow-path profiler for GenExtension code generation. |
| 3 | +
|
| 4 | +This module instruments the JIT extension code generator to track why operations |
| 5 | +go through the slow path instead of fast-path code generation. |
| 6 | +
|
| 7 | +Enable with environment variable: PYPY_SLOWPATH_PROFILE=1 |
| 8 | +
|
| 9 | +Reason codes: |
| 10 | + NO_UNSPEC_METHOD - No emit_unspecialized_<name> method exists |
| 11 | + NO_SPEC_METHOD - No emit_specialized_<name> method exists (constant args) |
| 12 | + UNSUPPORTED_SPEC - Specialized method raised Unsupported exception |
| 13 | + UNSUPPORTED_UNSPEC - Unspecialized method raised Unsupported exception |
| 14 | + SPEC_RETURNED_NONE - Specialized method returned None |
| 15 | + UNSPEC_RETURNED_NONE- Unspecialized method returned None |
| 16 | + IS_CALL - Operation is a call/inline_call |
| 17 | + IS_GUARD - Operation is a guard |
| 18 | + IS_MEMORY_OP - Operation is getfield/setfield/getarray/setarray |
| 19 | + HAS_LABEL_ARG - Operation has label argument (control flow) |
| 20 | + IS_LIVE_OP - Operation is -live- marker |
| 21 | + NEWFRAME - Operation requires new frame |
| 22 | +""" |
| 23 | + |
| 24 | +import os |
| 25 | +from collections import defaultdict |
| 26 | +import atexit |
| 27 | + |
| 28 | +# Enable profiling via environment variable |
| 29 | +SLOWPATH_PROFILE_ENABLED = os.environ.get('PYPY_SLOWPATH_PROFILE', '') == '1' |
| 30 | +SLOWPATH_PROFILE_VERBOSE = os.environ.get('PYPY_SLOWPATH_PROFILE_VERBOSE', '') == '1' |
| 31 | + |
| 32 | +# Reason codes |
| 33 | +REASON_NO_UNSPEC_METHOD = 'NO_UNSPEC_METHOD' |
| 34 | +REASON_NO_SPEC_METHOD = 'NO_SPEC_METHOD' |
| 35 | +REASON_UNSUPPORTED_SPEC = 'UNSUPPORTED_SPEC' |
| 36 | +REASON_UNSUPPORTED_UNSPEC = 'UNSUPPORTED_UNSPEC' |
| 37 | +REASON_SPEC_RETURNED_NONE = 'SPEC_RETURNED_NONE' |
| 38 | +REASON_UNSPEC_RETURNED_NONE = 'UNSPEC_RETURNED_NONE' |
| 39 | +REASON_IS_CALL = 'IS_CALL' |
| 40 | +REASON_IS_GUARD = 'IS_GUARD' |
| 41 | +REASON_IS_MEMORY_OP = 'IS_MEMORY_OP' |
| 42 | +REASON_HAS_LABEL_ARG = 'HAS_LABEL_ARG' |
| 43 | +REASON_IS_LIVE_OP = 'IS_LIVE_OP' |
| 44 | +REASON_NEWFRAME = 'NEWFRAME' |
| 45 | +REASON_FAST_PATH = 'FAST_PATH' # Successfully used fast path |
| 46 | + |
| 47 | + |
| 48 | +class SlowPathProfiler(object): |
| 49 | + """Tracks slow-path reasons during code generation.""" |
| 50 | + |
| 51 | + _instance = None |
| 52 | + |
| 53 | + def __init__(self): |
| 54 | + # (opname, reason) -> count at codegen time |
| 55 | + self.codegen_counts = defaultdict(int) |
| 56 | + # (opname, reason) -> runtime execution count |
| 57 | + self.runtime_counts = defaultdict(int) |
| 58 | + # Track unique opcodes seen |
| 59 | + self.all_opcodes = set() |
| 60 | + # Track which opcodes have which methods |
| 61 | + self.opcode_method_info = {} # opname -> {'has_spec': bool, 'has_unspec': bool} |
| 62 | + |
| 63 | + @classmethod |
| 64 | + def get_instance(cls): |
| 65 | + if cls._instance is None: |
| 66 | + cls._instance = cls() |
| 67 | + if SLOWPATH_PROFILE_ENABLED: |
| 68 | + atexit.register(cls._instance.print_report) |
| 69 | + return cls._instance |
| 70 | + |
| 71 | + def record_codegen(self, opname, reason, count=1): |
| 72 | + """Record a slow-path reason during code generation.""" |
| 73 | + if not SLOWPATH_PROFILE_ENABLED: |
| 74 | + return |
| 75 | + self.codegen_counts[(opname, reason)] += count |
| 76 | + self.all_opcodes.add(opname) |
| 77 | + if SLOWPATH_PROFILE_VERBOSE: |
| 78 | + print("[SLOWPATH] codegen: %s -> %s" % (opname, reason)) |
| 79 | + |
| 80 | + def record_method_info(self, opname, has_specialized, has_unspecialized): |
| 81 | + """Record which methods exist for an opcode.""" |
| 82 | + if not SLOWPATH_PROFILE_ENABLED: |
| 83 | + return |
| 84 | + self.opcode_method_info[opname] = { |
| 85 | + 'has_spec': has_specialized, |
| 86 | + 'has_unspec': has_unspecialized |
| 87 | + } |
| 88 | + |
| 89 | + def increment_runtime(self, opname, reason): |
| 90 | + """Called at runtime to increment execution counter.""" |
| 91 | + self.runtime_counts[(opname, reason)] += 1 |
| 92 | + |
| 93 | + def get_runtime_increment_code(self, opname, reason): |
| 94 | + """Generate code that increments runtime counter.""" |
| 95 | + if not SLOWPATH_PROFILE_ENABLED: |
| 96 | + return [] |
| 97 | + return [ |
| 98 | + "from rpython.jit.codewriter.slowpath_profiler import SlowPathProfiler", |
| 99 | + "SlowPathProfiler.get_instance().increment_runtime(%r, %r)" % (opname, reason) |
| 100 | + ] |
| 101 | + |
| 102 | + def print_report(self): |
| 103 | + """Print the slow-path analysis report.""" |
| 104 | + if not self.codegen_counts and not self.runtime_counts: |
| 105 | + return |
| 106 | + |
| 107 | + print("\n" + "=" * 80) |
| 108 | + print("SLOW-PATH PROFILER REPORT") |
| 109 | + print("=" * 80) |
| 110 | + |
| 111 | + # Aggregate by reason |
| 112 | + reason_totals = defaultdict(int) |
| 113 | + for (opname, reason), count in self.codegen_counts.items(): |
| 114 | + reason_totals[reason] += count |
| 115 | + |
| 116 | + total_ops = sum(self.codegen_counts.values()) |
| 117 | + fast_path_count = reason_totals.get(REASON_FAST_PATH, 0) |
| 118 | + slow_path_count = total_ops - fast_path_count |
| 119 | + |
| 120 | + print("\n--- SUMMARY ---") |
| 121 | + print("Total operations analyzed: %d" % total_ops) |
| 122 | + print("Fast path operations: %d (%.1f%%)" % ( |
| 123 | + fast_path_count, 100.0 * fast_path_count / total_ops if total_ops else 0)) |
| 124 | + print("Slow path operations: %d (%.1f%%)" % ( |
| 125 | + slow_path_count, 100.0 * slow_path_count / total_ops if total_ops else 0)) |
| 126 | + |
| 127 | + print("\n--- SLOW-PATH BREAKDOWN BY REASON ---") |
| 128 | + print("%-25s %10s %8s" % ("Reason", "Count", "Share%")) |
| 129 | + print("-" * 45) |
| 130 | + for reason, count in sorted(reason_totals.items(), key=lambda x: -x[1]): |
| 131 | + if reason == REASON_FAST_PATH: |
| 132 | + continue |
| 133 | + pct = 100.0 * count / slow_path_count if slow_path_count else 0 |
| 134 | + print("%-25s %10d %7.1f%%" % (reason, count, pct)) |
| 135 | + |
| 136 | + print("\n--- TOP 30 SLOW-PATH (OPCODE, REASON) PAIRS ---") |
| 137 | + print("%-35s %-25s %10s %8s" % ("Opcode", "Reason", "Count", "Share%")) |
| 138 | + print("-" * 80) |
| 139 | + |
| 140 | + # Filter out fast path and sort by count |
| 141 | + slow_pairs = [(k, v) for k, v in self.codegen_counts.items() |
| 142 | + if k[1] != REASON_FAST_PATH] |
| 143 | + slow_pairs.sort(key=lambda x: -x[1]) |
| 144 | + |
| 145 | + for (opname, reason), count in slow_pairs[:30]: |
| 146 | + pct = 100.0 * count / slow_path_count if slow_path_count else 0 |
| 147 | + print("%-35s %-25s %10d %7.1f%%" % (opname, reason, count, pct)) |
| 148 | + |
| 149 | + # Report surprising invariants |
| 150 | + self._report_invariants() |
| 151 | + |
| 152 | + print("\n" + "=" * 80) |
| 153 | + |
| 154 | + def _report_invariants(self): |
| 155 | + """Report surprising patterns in the data.""" |
| 156 | + print("\n--- SURPRISING INVARIANTS ---") |
| 157 | + |
| 158 | + # Find opcodes that have both fast and slow path hits |
| 159 | + opcode_reasons = defaultdict(set) |
| 160 | + for (opname, reason), count in self.codegen_counts.items(): |
| 161 | + if count > 0: |
| 162 | + opcode_reasons[opname].add(reason) |
| 163 | + |
| 164 | + mixed_opcodes = [] |
| 165 | + for opname, reasons in opcode_reasons.items(): |
| 166 | + if REASON_FAST_PATH in reasons and len(reasons) > 1: |
| 167 | + mixed_opcodes.append((opname, reasons - {REASON_FAST_PATH})) |
| 168 | + |
| 169 | + if mixed_opcodes: |
| 170 | + print("\nOpcodes with BOTH fast and slow path (check for missed optimization):") |
| 171 | + for opname, slow_reasons in sorted(mixed_opcodes): |
| 172 | + fast_count = self.codegen_counts.get((opname, REASON_FAST_PATH), 0) |
| 173 | + slow_count = sum(self.codegen_counts.get((opname, r), 0) for r in slow_reasons) |
| 174 | + print(" %s: fast=%d, slow=%d (reasons: %s)" % ( |
| 175 | + opname, fast_count, slow_count, ', '.join(sorted(slow_reasons)))) |
| 176 | + |
| 177 | + # Find opcodes with unspecialized method that still go slow due to UNSUPPORTED |
| 178 | + unsupported_with_method = [] |
| 179 | + for opname, info in self.opcode_method_info.items(): |
| 180 | + if info.get('has_unspec'): |
| 181 | + count = self.codegen_counts.get((opname, REASON_UNSUPPORTED_UNSPEC), 0) |
| 182 | + if count > 0: |
| 183 | + unsupported_with_method.append((opname, count)) |
| 184 | + |
| 185 | + if unsupported_with_method: |
| 186 | + print("\nOpcodes with unspecialized method that raised Unsupported:") |
| 187 | + for opname, count in sorted(unsupported_with_method, key=lambda x: -x[1])[:10]: |
| 188 | + print(" %s: %d times" % (opname, count)) |
| 189 | + |
| 190 | + |
| 191 | +def classify_opcode(opname): |
| 192 | + """Classify an opcode into categories for analysis.""" |
| 193 | + if opname.startswith('-'): |
| 194 | + if opname == '-live-': |
| 195 | + return REASON_IS_LIVE_OP |
| 196 | + return None |
| 197 | + |
| 198 | + if 'call' in opname or 'inline_call' in opname: |
| 199 | + return REASON_IS_CALL |
| 200 | + |
| 201 | + if 'guard' in opname: |
| 202 | + return REASON_IS_GUARD |
| 203 | + |
| 204 | + if any(x in opname for x in ['getfield', 'setfield', 'getarrayitem', 'setarrayitem', |
| 205 | + 'getinteriorfield', 'setinteriorfield', 'raw_load', 'raw_store']): |
| 206 | + return REASON_IS_MEMORY_OP |
| 207 | + |
| 208 | + return None |
| 209 | + |
| 210 | + |
| 211 | +def get_profiler(): |
| 212 | + """Get the singleton profiler instance.""" |
| 213 | + return SlowPathProfiler.get_instance() |
0 commit comments