@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
162162}
163163
164164int
165- _PyOptimizer_BackEdge (_PyInterpreterFrame * frame , _Py_CODEUNIT * src , _Py_CODEUNIT * dest , PyObject * * stack_pointer )
165+ _PyOptimizer_Optimize (_PyInterpreterFrame * frame , _Py_CODEUNIT * start , PyObject * * stack_pointer )
166166{
167- assert (src -> op .code == JUMP_BACKWARD );
168167 PyCodeObject * code = (PyCodeObject * )frame -> f_executable ;
169168 assert (PyCode_Check (code ));
170169 PyInterpreterState * interp = _PyInterpreterState_GET ();
171- if (!has_space_for_executor (code , src )) {
170+ if (!has_space_for_executor (code , start )) {
172171 return 0 ;
173172 }
174173 _PyOptimizerObject * opt = interp -> optimizer ;
175174 _PyExecutorObject * executor = NULL ;
176- /* Start optimizing at the destination to guarantee forward progress */
177- int err = opt -> optimize (opt , code , dest , & executor , (int )(stack_pointer - _PyFrame_Stackbase (frame )));
175+ int err = opt -> optimize (opt , code , start , & executor , (int )(stack_pointer - _PyFrame_Stackbase (frame )));
178176 if (err <= 0 ) {
179177 assert (executor == NULL );
180178 return err ;
181179 }
182- int index = get_index_for_executor (code , src );
180+ int index = get_index_for_executor (code , start );
183181 if (index < 0 ) {
184182 /* Out of memory. Don't raise and assume that the
185183 * error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
190188 Py_DECREF (executor );
191189 return 0 ;
192190 }
193- insert_executor (code , src , index , executor );
191+ insert_executor (code , start , index , executor );
194192 Py_DECREF (executor );
195193 return 1 ;
196194}
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
316314#define CONFIDENCE_RANGE 1000
317315#define CONFIDENCE_CUTOFF 333
318316
319- /* Returns 1 on success,
320- * 0 if it failed to produce a worthwhile trace,
321- * and -1 on an error.
322- */
323- static int
324- translate_bytecode_to_trace (
325- PyCodeObject * code ,
326- _Py_CODEUNIT * instr ,
327- _PyUOpInstruction * trace ,
328- int buffer_size ,
329- _PyBloomFilter * dependencies )
330- {
331- PyCodeObject * initial_code = code ;
332- _Py_BloomFilter_Add (dependencies , initial_code );
333- _Py_CODEUNIT * initial_instr = instr ;
334- int trace_length = 0 ;
335- int max_length = buffer_size ;
336- struct {
337- PyCodeObject * code ;
338- _Py_CODEUNIT * instr ;
339- } trace_stack [TRACE_STACK_SIZE ];
340- int trace_stack_depth = 0 ;
341- int confidence = CONFIDENCE_RANGE ; // Adjusted by branch instructions
342-
343- #ifdef Py_DEBUG
344- char * python_lltrace = Py_GETENV ("PYTHON_LLTRACE" );
345- int lltrace = 0 ;
346- if (python_lltrace != NULL && * python_lltrace >= '0' ) {
347- lltrace = * python_lltrace - '0' ; // TODO: Parse an int and all that
348- }
349- #endif
350-
351317#ifdef Py_DEBUG
352318#define DPRINTF (level , ...) \
353319 if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,13 +369,47 @@ translate_bytecode_to_trace(
403369 code = trace_stack[trace_stack_depth].code; \
404370 instr = trace_stack[trace_stack_depth].instr;
405371
372+ /* Returns 1 on success,
373+ * 0 if it failed to produce a worthwhile trace,
374+ * and -1 on an error.
375+ */
376+ static int
377+ translate_bytecode_to_trace (
378+ PyCodeObject * code ,
379+ _Py_CODEUNIT * instr ,
380+ _PyUOpInstruction * trace ,
381+ int buffer_size ,
382+ _PyBloomFilter * dependencies )
383+ {
384+ bool progress_needed = true;
385+ PyCodeObject * initial_code = code ;
386+ _Py_BloomFilter_Add (dependencies , initial_code );
387+ _Py_CODEUNIT * initial_instr = instr ;
388+ int trace_length = 0 ;
389+ int max_length = buffer_size ;
390+ struct {
391+ PyCodeObject * code ;
392+ _Py_CODEUNIT * instr ;
393+ } trace_stack [TRACE_STACK_SIZE ];
394+ int trace_stack_depth = 0 ;
395+ int confidence = CONFIDENCE_RANGE ; // Adjusted by branch instructions
396+
397+ #ifdef Py_DEBUG
398+ char * python_lltrace = Py_GETENV ("PYTHON_LLTRACE" );
399+ int lltrace = 0 ;
400+ if (python_lltrace != NULL && * python_lltrace >= '0' ) {
401+ lltrace = * python_lltrace - '0' ; // TODO: Parse an int and all that
402+ }
403+ #endif
404+
406405 DPRINTF (4 ,
407406 "Optimizing %s (%s:%d) at byte offset %d\n" ,
408407 PyUnicode_AsUTF8 (code -> co_qualname ),
409408 PyUnicode_AsUTF8 (code -> co_filename ),
410409 code -> co_firstlineno ,
411410 2 * INSTR_IP (initial_instr , code ));
412411 uint32_t target = 0 ;
412+
413413top : // Jump here after _PUSH_FRAME or likely branches
414414 for (;;) {
415415 target = INSTR_IP (instr , code );
@@ -421,6 +421,15 @@ translate_bytecode_to_trace(
421421 uint32_t oparg = instr -> op .arg ;
422422 uint32_t extended = 0 ;
423423
424+ if (opcode == ENTER_EXECUTOR ) {
425+ assert (oparg < 256 );
426+ _PyExecutorObject * executor =
427+ (_PyExecutorObject * )code -> co_executors -> executors [oparg ];
428+ opcode = executor -> vm_data .opcode ;
429+ DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
430+ oparg = executor -> vm_data .oparg ;
431+ }
432+
424433 if (opcode == EXTENDED_ARG ) {
425434 instr ++ ;
426435 extended = 1 ;
@@ -431,13 +440,23 @@ translate_bytecode_to_trace(
431440 goto done ;
432441 }
433442 }
434-
435- if (opcode == ENTER_EXECUTOR ) {
436- _PyExecutorObject * executor =
437- (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
438- opcode = executor -> vm_data .opcode ;
439- DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
440- oparg = (oparg & 0xffffff00 ) | executor -> vm_data .oparg ;
443+ assert (opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG );
444+
445+ /* Special case the first instruction,
446+ * so that we can guarantee forward progress */
447+ if (progress_needed ) {
448+ progress_needed = false;
449+ if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT ) {
450+ instr += 1 + _PyOpcode_Caches [opcode ] - (int32_t )oparg ;
451+ initial_instr = instr ;
452+ continue ;
453+ }
454+ else {
455+ if (OPCODE_HAS_DEOPT (opcode )) {
456+ opcode = _PyOpcode_Deopt [opcode ];
457+ }
458+ assert (!OPCODE_HAS_DEOPT (opcode ));
459+ }
441460 }
442461
443462 switch (opcode ) {
@@ -480,7 +499,9 @@ translate_bytecode_to_trace(
480499 case JUMP_BACKWARD :
481500 case JUMP_BACKWARD_NO_INTERRUPT :
482501 {
483- if (instr + 2 - oparg == initial_instr && code == initial_code ) {
502+ _Py_CODEUNIT * target = instr + 1 + _PyOpcode_Caches [opcode ] - (int )oparg ;
503+ if (target == initial_instr ) {
504+ /* We have looped round to the start */
484505 RESERVE (1 );
485506 ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
486507 }
@@ -641,35 +662,33 @@ translate_bytecode_to_trace(
641662 }
642663 assert (code == initial_code );
643664 // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
644- if (trace_length > 4 ) {
645- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
646- DPRINTF (1 ,
647- "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n" ,
648- PyUnicode_AsUTF8 (code -> co_qualname ),
649- PyUnicode_AsUTF8 (code -> co_filename ),
650- code -> co_firstlineno ,
651- 2 * INSTR_IP (initial_instr , code ),
652- trace_length );
653- OPT_HIST (trace_length + buffer_size - max_length , trace_length_hist );
654- return 1 ;
655- }
656- else {
665+ if (progress_needed || trace_length < 5 ) {
657666 OPT_STAT_INC (trace_too_short );
658667 DPRINTF (4 ,
659668 "No trace for %s (%s:%d) at byte offset %d\n" ,
660669 PyUnicode_AsUTF8 (code -> co_qualname ),
661670 PyUnicode_AsUTF8 (code -> co_filename ),
662671 code -> co_firstlineno ,
663672 2 * INSTR_IP (initial_instr , code ));
673+ return 0 ;
664674 }
665- return 0 ;
675+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
676+ DPRINTF (1 ,
677+ "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n" ,
678+ PyUnicode_AsUTF8 (code -> co_qualname ),
679+ PyUnicode_AsUTF8 (code -> co_filename ),
680+ code -> co_firstlineno ,
681+ 2 * INSTR_IP (initial_instr , code ),
682+ trace_length );
683+ OPT_HIST (trace_length + buffer_size - max_length , trace_length_hist );
684+ return 1 ;
685+ }
666686
667687#undef RESERVE
668688#undef RESERVE_RAW
669689#undef INSTR_IP
670690#undef ADD_TO_TRACE
671691#undef DPRINTF
672- }
673692
674693#define UNSET_BIT (array , bit ) (array[(bit)>>5] &= ~(1<<((bit)&31)))
675694#define SET_BIT (array , bit ) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
854873 int Py_UNUSED (curr_stackentries )
855874)
856875{
876+ int oparg = instr -> op .arg ;
877+ while (instr -> op .code == EXTENDED_ARG ) {
878+ instr ++ ;
879+ oparg = (oparg << 8 ) | instr -> op .arg ;
880+ }
881+ if (instr -> op .code != JUMP_BACKWARD ) {
882+ /* Counter optimizer can only handle backward edges */
883+ return 0 ;
884+ }
885+ _Py_CODEUNIT * target = instr + 1 + _PyOpcode_Caches [JUMP_BACKWARD ] - oparg ;
857886 _PyUOpInstruction buffer [3 ] = {
858887 { .opcode = _LOAD_CONST_INLINE_BORROW , .operand = (uintptr_t )self },
859888 { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
860- { .opcode = _EXIT_TRACE , .target = (uint32_t )(instr - _PyCode_CODE (code )) }
889+ { .opcode = _EXIT_TRACE , .target = (uint32_t )(target - _PyCode_CODE (code )) }
861890 };
862891 _PyBloomFilter empty ;
863892 _Py_BloomFilter_Init (& empty );
0 commit comments