@@ -90,20 +90,17 @@ def generate(self):
9090 self ._make_code (self .pc_to_index [spec .orig_pc ], spec .insn )
9191 code_and_spec_per_pc [pc ] = (str (py .code .Source ("\n " .join (self .code )).deindent ()), spec )
9292
93- # Analyze PC transitions for trace prediction and threaded code
94- pc_transitions = self ._analyze_pc_transitions (code_and_spec_per_pc )
95-
9693 self .code = []
9794 allconsts = set ()
98- num_pcs = len (code_and_spec_per_pc )
99- use_dict_dispatch = num_pcs > DICT_DISPATCH_THRESHOLD
10095
101- if use_dict_dispatch :
102- # Dictionary-based dispatch for large functions - O(1) lookup
103- self ._generate_dict_dispatch (code_and_spec_per_pc , allconsts , pc_transitions )
104- else :
105- # Optimized if/elif dispatch with trace prediction for small functions
106- self ._generate_threaded_dispatch (code_and_spec_per_pc , allconsts , pc_transitions )
96+ # Generate simple if/elif dispatch
97+ for pc , (code , spec ) in sorted (code_and_spec_per_pc .iteritems ()):
98+ allconsts .update (spec .constant_registers )
99+ self .code .append ("if pc == %s: # %s %s" % (pc , spec .insn , spec .constant_registers ))
100+ self .code .append (" self.pc = %s" % (self .pc_to_nextpc [spec .orig_pc ], ))
101+ for line in str (py .code .Source (code ).indent (' ' )).splitlines ():
102+ self .code .append (line )
103+ self .code .append ("assert 0 # unreachable" )
107104
108105 allcode = []
109106 allconsts = sorted (["%s%s" % (val .kind [0 ], val .index ) for val in allconsts ])
@@ -139,121 +136,6 @@ def generate(self):
139136 self .jitcode .genext_function = d ['jit_shortcut' ]
140137 self .jitcode .genext_function .__name__ += "_" + self .jitcode .name
141138
142- def _analyze_pc_transitions (self , code_and_spec_per_pc ):
143- """
144- Analyze PC transitions to find:
145- 1. Linear chains (pc A always goes to pc B)
146- 2. Most likely next PC for each block (for trace prediction)
147- Returns a dict mapping pc -> (likely_next_pc, is_unconditional)
148- """
149- transitions = {}
150- for pc , (code , spec ) in code_and_spec_per_pc .iteritems ():
151- if code is None :
152- continue
153- # Parse the code to find 'pc = X' assignments
154- next_pcs = []
155- lines = code .split ('\n ' )
156- for line in lines :
157- line = line .strip ()
158- if line .startswith ('pc = ' ) and 'self.pc' not in line :
159- try :
160- next_pc = int (line .split ('=' )[1 ].strip ())
161- next_pcs .append (next_pc )
162- except (ValueError , IndexError ):
163- pass
164- if len (next_pcs ) == 1 :
165- # Unconditional transition - perfect for threaded code
166- transitions [pc ] = (next_pcs [0 ], True )
167- elif next_pcs :
168- # Multiple targets - use first as likely prediction
169- transitions [pc ] = (next_pcs [0 ], False )
170- return transitions
171-
172- def _generate_dict_dispatch (self , code_and_spec_per_pc , allconsts , pc_transitions ):
173- """
174- Generate dictionary-based dispatch for O(1) PC lookup.
175- This is more efficient for functions with many PC values.
176-
177- For large functions, we use a true dictionary-based dispatch where
178- each PC handler is a closure that updates pc and continues.
179- """
180- # Build the dispatch cases
181- dispatch_cases = []
182- for pc , (code , spec ) in sorted (code_and_spec_per_pc .iteritems ()):
183- allconsts .update (spec .constant_registers )
184- case_lines = []
185- case_lines .append ("self.pc = %s" % (self .pc_to_nextpc [spec .orig_pc ], ))
186- for line in str (py .code .Source (code ).indent ('' )).splitlines ():
187- case_lines .append (line )
188- dispatch_cases .append ((pc , case_lines , spec ))
189-
190- # Generate the dispatch code - still use if/elif but with optimized ordering
191- # based on trace prediction (most likely targets first)
192- for pc , case_lines , spec in dispatch_cases :
193- self .code .append ("if pc == %s: # %s %s" % (pc , spec .insn , spec .constant_registers ))
194- for line in case_lines :
195- self .code .append (" " + line )
196-
197- self .code .append ("assert 0 # unreachable" )
198-
199- def _generate_threaded_dispatch (self , code_and_spec_per_pc , allconsts , pc_transitions ):
200- """
201- Generate threaded code dispatch with trace prediction.
202- For blocks with unconditional transitions, inline the next block
203- to avoid dispatch overhead (direct threading).
204-
205- This optimization:
206- 1. Finds chains of blocks with unconditional transitions
207- 2. Inlines successive blocks to avoid dispatch overhead
208- 3. Preserves the original output format for compatibility
209- """
210- # For now, generate standard dispatch to preserve test compatibility
211- # The threaded optimization happens at a lower level via _try_inline_chain
212- for pc , (code , spec ) in sorted (code_and_spec_per_pc .iteritems ()):
213- allconsts .update (spec .constant_registers )
214- self .code .append ("if pc == %s: # %s %s" % (pc , spec .insn , spec .constant_registers ))
215- self .code .append (" self.pc = %s" % (self .pc_to_nextpc [spec .orig_pc ], ))
216- for line in str (py .code .Source (code ).indent (' ' )).splitlines ():
217- self .code .append (line )
218- self .code .append ("assert 0 # unreachable" )
219-
220- def _find_threaded_chains (self , sorted_pcs , pc_transitions , code_and_spec_per_pc ):
221- """
222- Find chains of PCs that can be threaded together (inlined).
223- A chain is a sequence of blocks where each unconditionally jumps to the next.
224- """
225- chains = {}
226- in_chain = set ()
227-
228- for start_pc in sorted_pcs :
229- if start_pc in in_chain :
230- continue
231-
232- chain = [start_pc ]
233- current_pc = start_pc
234-
235- # Follow unconditional transitions
236- while current_pc in pc_transitions :
237- next_pc , is_unconditional = pc_transitions [current_pc ]
238- if not is_unconditional :
239- break
240- if next_pc in in_chain or next_pc == start_pc :
241- break # Avoid cycles and already-chained blocks
242- if next_pc not in code_and_spec_per_pc :
243- break
244- chain .append (next_pc )
245- in_chain .add (next_pc )
246- current_pc = next_pc
247- # Limit chain length to prevent excessive inlining
248- if len (chain ) >= MAX_INLINE_DEPTH :
249- break
250-
251- if len (chain ) > 1 :
252- chains [start_pc ] = chain
253- in_chain .add (start_pc )
254-
255- return chains
256-
257139 def _make_code (self , index , insn ):
258140 self ._reset_insn ()
259141 assert not (isinstance (insn [0 ], Label ) or insn [0 ] == '---' )
@@ -643,9 +525,6 @@ def next_possible_pcs(self, insn, needed_label, nextpc):
643525# Maximum depth for inlining to prevent code explosion
644526MAX_INLINE_DEPTH = 16
645527
646- # Threshold for switching to dictionary-based dispatch
647- DICT_DISPATCH_THRESHOLD = 25
648-
649528# Maximum loop body size for unrolling consideration
650529MAX_UNROLL_LOOP_SIZE = 3
651530
@@ -691,70 +570,6 @@ def next_possible_pcs(self, insn, needed_label, nextpc):
691570 (r'int\((\w+) == (\w+)\) != 0' , r'\1 == \2' ), # int(x == y) != 0 -> x == y
692571]
693572
694- class ValueNumbering (object ):
695- """
696- Simple value numbering for common subexpression elimination.
697- Tracks expressions and their computed results within a basic block.
698- """
699- def __init__ (self ):
700- self .expr_to_value = {} # (op, arg1, arg2) -> result_register
701- self .value_to_expr = {} # result_register -> (op, arg1, arg2)
702- self .aliases = {} # register -> canonical_register (copy propagation)
703-
704- def copy (self ):
705- """Create a copy for branching paths."""
706- new = ValueNumbering ()
707- new .expr_to_value = self .expr_to_value .copy ()
708- new .value_to_expr = self .value_to_expr .copy ()
709- new .aliases = self .aliases .copy ()
710- return new
711-
712- def add_alias (self , src , dst ):
713- """Record that dst is a copy of src."""
714- canonical = self .get_canonical (src )
715- self .aliases [dst ] = canonical
716-
717- def get_canonical (self , reg ):
718- """Get the canonical register (follow alias chain)."""
719- visited = set ()
720- while reg in self .aliases and reg not in visited :
721- visited .add (reg )
722- reg = self .aliases [reg ]
723- return reg
724-
725- def lookup_expr (self , op , * args ):
726- """Check if this expression was already computed."""
727- # Canonicalize arguments
728- canonical_args = tuple (self .get_canonical (a ) if not isinstance (a , (int , float , Constant )) else a
729- for a in args )
730- key = (op ,) + canonical_args
731- return self .expr_to_value .get (key )
732-
733- def record_expr (self , op , result , * args ):
734- """Record that result = op(args)."""
735- canonical_args = tuple (self .get_canonical (a ) if not isinstance (a , (int , float , Constant )) else a
736- for a in args )
737- key = (op ,) + canonical_args
738- self .expr_to_value [key ] = result
739- self .value_to_expr [result ] = key
740-
741- def invalidate (self , reg ):
742- """Invalidate a register (when it's reassigned)."""
743- canonical = self .get_canonical (reg )
744- # Remove from aliases
745- if reg in self .aliases :
746- del self .aliases [reg ]
747- # Remove expressions that use this register
748- to_remove = []
749- for key , val in self .expr_to_value .items ():
750- if val == canonical or canonical in key :
751- to_remove .append (key )
752- for key in to_remove :
753- del self .expr_to_value [key ]
754- if canonical in self .value_to_expr :
755- del self .value_to_expr [canonical ]
756-
757-
758573def _apply_peephole_optimizations (code ):
759574 """
760575 Apply peephole optimizations to generated Python code.
@@ -1923,9 +1738,6 @@ def _try_inline_chain(self, lines, target_pc, constant_registers, indent):
19231738 prev_insn = None
19241739 prev_result = None
19251740
1926- # Value numbering for CSE and copy propagation
1927- value_numbering = ValueNumbering ()
1928-
19291741 while self .inline_depth + len (visited ) < MAX_INLINE_DEPTH :
19301742 if current_pc in visited :
19311743 # Loop detected! Try to generate a tight loop
@@ -1965,7 +1777,7 @@ def _try_inline_chain(self, lines, target_pc, constant_registers, indent):
19651777
19661778 # Try superinstruction optimization
19671779 superinsn_result = self ._try_superinstruction (
1968- prev_insn , insn , prev_result , current_const_regs , value_numbering , lines , indent )
1780+ prev_insn , insn , prev_result , current_const_regs , lines , indent )
19691781 if superinsn_result is not None :
19701782 # Superinstruction was applied
19711783 new_const_regs , new_result = superinsn_result
@@ -1997,11 +1809,6 @@ def _try_inline_chain(self, lines, target_pc, constant_registers, indent):
19971809 prev_insn = insn
19981810 if '->' in insn :
19991811 prev_result = insn [- 1 ]
2000- # Record in value numbering for CSE
2001- if len (insn ) >= 4 and insn [0 ] in ('int_add' , 'int_sub' , 'int_mul' , 'int_and' , 'int_or' , 'int_xor' ):
2002- value_numbering .record_expr (insn [0 ], prev_result , insn [1 ], insn [2 ])
2003- elif insn [0 ] in ('int_copy' , 'ref_copy' , 'float_copy' ):
2004- value_numbering .add_alias (insn [1 ], prev_result )
20051812 else :
20061813 prev_result = None
20071814
@@ -2028,7 +1835,7 @@ def _try_inline_chain(self, lines, target_pc, constant_registers, indent):
20281835
20291836 return False
20301837
2031- def _try_superinstruction (self , prev_insn , curr_insn , prev_result , const_regs , value_numbering , lines , indent ):
1838+ def _try_superinstruction (self , prev_insn , curr_insn , prev_result , const_regs , lines , indent ):
20321839 """
20331840 Try to apply superinstruction optimization for common patterns.
20341841 Returns (new_const_regs, result_reg) if applied, None otherwise.
0 commit comments