diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e19fb..a9432401525ebb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 +#define _STORE_FAST 524 +#define _STORE_FAST_0 525 +#define _STORE_FAST_1 526 +#define _STORE_FAST_2 527 +#define _STORE_FAST_3 528 +#define _STORE_FAST_4 529 +#define _STORE_FAST_5 530 +#define _STORE_FAST_6 531 +#define _STORE_FAST_7 532 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 533 +#define _STORE_SUBSCR 534 +#define _STORE_SUBSCR_DICT 535 +#define _STORE_SUBSCR_LIST_INT 536 +#define _SWAP 537 +#define _SWAP_2 538 +#define _SWAP_3 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785817..52cbc2fffe484e 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = 0, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b6934d747ebe0..c5b5322725f986 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2349,6 +2349,46 @@ def testfunc(n): assert ex is not None """)) + def test_pop_top_specialize_none(self): + def testfunc(n): + for _ in range(n): + global_identity(None) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def test_pop_top_specialize_int(self): + def testfunc(n): + for _ in range(n): + global_identity(100000) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_INT", uops) + + def test_pop_top_specialize_float(self): + def testfunc(n): + for _ in range(n): + global_identity(1e6) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_FLOAT", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst new file mode 100644 index 00000000000000..715ac7dc9253d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst @@ -0,0 +1 @@ +Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38ccfcc..535e552e047475 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,27 @@ dummy_func( PyStackRef_XCLOSE(value); } + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550afe..46fc164a5b3bc2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,46 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d2916f..145a8c118d3612 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 2e240830013a46..cfdd091beca88d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -534,7 +534,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { @@ -542,7 +542,7 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { @@ -561,6 +561,24 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -803,7 +821,9 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3cabf619fe81e7..6d5770b87cf3d3 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -100,6 +100,47 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -784,7 +825,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2641,7 +2682,7 @@ case _LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2651,7 +2692,7 @@ case _POP_TOP_LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 64cc1b9074fcf0..c3d9e0e778bf55 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6ff0223d2ef3e7..6466d2615cd14e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc599..0bcdc5395dcd8e 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: