diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 62325250bd368e..3349eb042425dd 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -57,9 +57,10 @@ jobs: fail-fast: false matrix: target: - - i686-pc-windows-msvc/msvc - - x86_64-pc-windows-msvc/msvc - - aarch64-pc-windows-msvc/msvc +# To re-enable later when we support these. +# - i686-pc-windows-msvc/msvc +# - x86_64-pc-windows-msvc/msvc +# - aarch64-pc-windows-msvc/msvc - x86_64-apple-darwin/clang - aarch64-apple-darwin/clang - x86_64-unknown-linux-gnu/gcc @@ -70,15 +71,16 @@ jobs: llvm: - 21 include: - - target: i686-pc-windows-msvc/msvc - architecture: Win32 - runner: windows-2022 - - target: x86_64-pc-windows-msvc/msvc - architecture: x64 - runner: windows-2022 - - target: aarch64-pc-windows-msvc/msvc - architecture: ARM64 - runner: windows-11-arm +# To re-enable later when we support these. +# - target: i686-pc-windows-msvc/msvc +# architecture: Win32 +# runner: windows-2022 +# - target: x86_64-pc-windows-msvc/msvc +# architecture: x64 +# runner: windows-2022 +# - target: aarch64-pc-windows-msvc/msvc +# architecture: ARM64 +# runner: windows-11-arm - target: x86_64-apple-darwin/clang architecture: x86_64 runner: macos-15-intel diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 5241533e11281f..d7fe9e2c9ec9b4 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -309,6 +309,14 @@ For convenience, some of these functions will always return a .. versionadded:: 3.4 +.. c:function:: void PyErr_RangedSyntaxLocationObject(PyObject *filename, int lineno, int col_offset, int end_lineno, int end_col_offset) + + Similar to :c:func:`PyErr_SyntaxLocationObject`, but also sets the + *end_lineno* and *end_col_offset* information for the current exception. + + .. versionadded:: 3.10 + + .. c:function:: void PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) Like :c:func:`PyErr_SyntaxLocationObject`, but *filename* is a byte string @@ -976,6 +984,9 @@ because the :ref:`call protocol ` takes care of recursion handling. be concatenated to the :exc:`RecursionError` message caused by the recursion depth limit. + .. seealso:: + The :c:func:`PyUnstable_ThreadState_SetStackProtection` function. + .. versionchanged:: 3.9 This function is now also available in the :ref:`limited API `. diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 49ffeab55850c0..18ee16118070eb 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1366,6 +1366,43 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. versionadded:: 3.11 +.. c:function:: int PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, void *stack_start_addr, size_t stack_size) + + Set the stack protection start address and stack protection size + of a Python thread state. + + On success, return ``0``. + On failure, set an exception and return ``-1``. + + CPython implements :ref:`recursion control ` for C code by raising + :py:exc:`RecursionError` when it notices that the machine execution stack is close + to overflow. See for example the :c:func:`Py_EnterRecursiveCall` function. + For this, it needs to know the location of the current thread's stack, which it + normally gets from the operating system. + When the stack is changed, for example using context switching techniques like the + Boost library's ``boost::context``, you must call + :c:func:`~PyUnstable_ThreadState_SetStackProtection` to inform CPython of the change. + + Call :c:func:`~PyUnstable_ThreadState_SetStackProtection` either before + or after changing the stack. + Do not call any other Python C API between the call and the stack + change. + + See :c:func:`PyUnstable_ThreadState_ResetStackProtection` for undoing this operation. + + .. versionadded:: next + + +.. c:function:: void PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) + + Reset the stack protection start address and stack protection size + of a Python thread state to the operating system defaults. + + See :c:func:`PyUnstable_ThreadState_SetStackProtection` for an explanation. + + .. versionadded:: next + + .. c:function:: PyInterpreterState* PyInterpreterState_Get(void) Get the current interpreter. diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 1d9ac328f32769..b2e2e11c0dc414 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -672,7 +672,7 @@ The :mod:`functools` module defines the following functions: dispatch>` :term:`generic function`. To define a generic method, decorate it with the ``@singledispatchmethod`` - decorator. When defining a function using ``@singledispatchmethod``, note + decorator. When defining a method using ``@singledispatchmethod``, note that the dispatch happens on the type of the first non-*self* or non-*cls* argument:: @@ -716,6 +716,9 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.8 + .. versionchanged:: next + Added support of non-:term:`descriptor` callables. + .. function:: update_wrapper(wrapper, wrapped, assigned=WRAPPER_ASSIGNMENTS, updated=WRAPPER_UPDATES) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 881708a4dd702e..cbbc87b4721a9f 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -656,6 +656,10 @@ Functions .. versionchanged:: 3.13 Added the :meth:`!close` method. + .. versionchanged:: next + A :exc:`ResourceWarning` is now emitted if the iterator opened a file + and is not explicitly closed. + .. function:: parse(source, parser=None) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index d7c9a41eeb2759..31594a2e70bd4c 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -498,6 +498,14 @@ difflib (Contributed by Jiahao Li in :gh:`134580`.) +functools +--------- + +* :func:`~functools.singledispatchmethod` now supports non-:term:`descriptor` + callables. + (Contributed by Serhiy Storchaka in :gh:`140873`.) + + hashlib ------- @@ -1066,6 +1074,12 @@ New features * Add :c:func:`PyTuple_FromArray` to create a :class:`tuple` from an array. (Contributed by Victor Stinner in :gh:`111489`.) +* Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and + :c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set + the stack protection base address and stack protection size of a Python + thread state. + (Contributed by Victor Stinner in :gh:`139653`.) + Changed C APIs -------------- @@ -1230,3 +1244,9 @@ that may require changes to your code. * :meth:`~mmap.mmap.resize` has been removed on platforms that don't support the underlying syscall, instead of raising a :exc:`SystemError`. + +* Resource warning is now emitted for unclosed + :func:`xml.etree.ElementTree.iterparse` iterator if it opened a file. + Use its :meth:`!close` method or the :func:`contextlib.closing` context + manager to close it. + (Contributed by Osama Abdelkader and Serhiy Storchaka in :gh:`140601`.) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index dd2ea1202b3795..c53abe43ebe65c 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -276,6 +276,18 @@ PyAPI_FUNC(int) PyGILState_Check(void); */ PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void); +// Set the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(int) PyUnstable_ThreadState_SetStackProtection( + PyThreadState *tstate, + void *stack_start_addr, // Stack start address + size_t stack_size); // Stack size (in bytes) + +// Reset the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStackProtection( + PyThreadState *tstate); + /* Routines for advanced debuggers, requested by David Beazley. Don't use unless you know what you are doing! */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void); diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index d0a925a3055485..1c94603c08b9b6 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -150,6 +150,8 @@ typedef struct _optimization_stats { uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimizer_attempts; uint64_t optimizer_successes; + uint64_t optimizer_contradiction; + uint64_t optimizer_frame_overflow; uint64_t optimizer_failure_reason_no_memory; uint64_t remove_globals_builtins_changed; uint64_t remove_globals_incorrect_keys; diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 454c8dde031ff4..71066f1bd9f19b 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -95,11 +95,24 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value_and_backoff < UNREACHABLE_BACKOFF; } +static inline _Py_BackoffCounter +trigger_backoff_counter(void) +{ + _Py_BackoffCounter result; + result.value_and_backoff = 0; + return result; +} + // Initial JUMP_BACKWARD counter. // Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is // invalidated we may construct a new trace before the bytecode has properly // re-specialized: -#define JUMP_BACKWARD_INITIAL_VALUE 4095 +// Note: this should be a prime number-1. This increases the likelihood of +// finding a "good" loop iteration to trace. +// For example, 4095 does not work for the nqueens benchmark on pyperformance +// as we always end up tracing the loop iteration's +// exhaustion iteration. Which aborts our current tracer. +#define JUMP_BACKWARD_INITIAL_VALUE 4000 #define JUMP_BACKWARD_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter initial_jump_backoff_counter(void) @@ -112,7 +125,7 @@ initial_jump_backoff_counter(void) * Must be larger than ADAPTIVE_COOLDOWN_VALUE, * otherwise when a side exit warms up we may construct * a new trace before the Tier 1 code has properly re-specialized. */ -#define SIDE_EXIT_INITIAL_VALUE 4095 +#define SIDE_EXIT_INITIAL_VALUE 4000 #define SIDE_EXIT_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index fe72a0123ebea8..33b9fd053f70cb 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -392,6 +392,8 @@ _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame #define SPECIAL___AEXIT__ 3 #define SPECIAL_MAX 3 +PyAPI_DATA(const _Py_CODEUNIT *) _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR; + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index e8cbe9d894e1c7..9e4504479cd9f0 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,8 +14,6 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState -#include "pycore_uop.h" // struct _PyUOpInstruction - #define CODE_MAX_WATCHERS 8 #define CONTEXT_MAX_WATCHERS 8 @@ -934,10 +932,10 @@ struct _is { PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; bool compiling; - struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; + struct _PyExecutorObject *cold_dynamic_executor; int executor_deletion_list_remaining_capacity; size_t executor_creation_counter; _rare_events rare_events; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index bd6b84ec7fd908..548627dc7982ec 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1031,6 +1031,8 @@ enum InstructionFormat { #define HAS_ERROR_NO_POP_FLAG (4096) #define HAS_NO_SAVE_IP_FLAG (8192) #define HAS_PERIODIC_FLAG (16384) +#define HAS_UNPREDICTABLE_JUMP_FLAG (32768) +#define HAS_NEEDS_GUARD_IP_FLAG (65536) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -1046,6 +1048,8 @@ enum InstructionFormat { #define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) #define OPCODE_HAS_NO_SAVE_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NO_SAVE_IP_FLAG)) #define OPCODE_HAS_PERIODIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PERIODIC_FLAG)) +#define OPCODE_HAS_UNPREDICTABLE_JUMP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_UNPREDICTABLE_JUMP_FLAG)) +#define OPCODE_HAS_NEEDS_GUARD_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NEEDS_GUARD_IP_FLAG)) #define OPARG_SIMPLE 0 #define OPARG_CACHE_1 1 @@ -1062,7 +1066,7 @@ enum InstructionFormat { struct opcode_metadata { uint8_t valid_entry; uint8_t instr_format; - uint16_t flags; + uint32_t flags; }; extern const struct opcode_metadata _PyOpcode_opcode_metadata[267]; @@ -1077,7 +1081,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG }, + [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_LIST_SLICE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, @@ -1094,22 +1098,22 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BUILD_TEMPLATE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, - [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_INTRINSIC_1] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1117,8 +1121,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, @@ -1143,7 +1147,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [DELETE_SUBSCR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [END_FOR] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG }, [END_SEND] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1151,11 +1155,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, + [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [GET_ANEXT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [GET_AWAITABLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1164,13 +1168,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG }, [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1183,8 +1187,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1197,7 +1201,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG }, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, @@ -1205,7 +1209,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1253,10 +1257,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [SETUP_ANNOTATIONS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [SET_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1292,7 +1296,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [ANNOTATIONS_PLACEHOLDER] = { true, -1, HAS_PURE_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_IF_FALSE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1406,6 +1410,9 @@ _PyOpcode_macro_expansion[256] = { [IMPORT_FROM] = { .nuops = 1, .uops = { { _IMPORT_FROM, OPARG_SIMPLE, 0 } } }, [IMPORT_NAME] = { .nuops = 1, .uops = { { _IMPORT_NAME, OPARG_SIMPLE, 0 } } }, [IS_OP] = { .nuops = 1, .uops = { { _IS_OP, OPARG_SIMPLE, 0 } } }, + [JUMP_BACKWARD] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } }, + [JUMP_BACKWARD_NO_INTERRUPT] = { .nuops = 1, .uops = { { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 0 } } }, + [JUMP_BACKWARD_NO_JIT] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } }, [LIST_APPEND] = { .nuops = 1, .uops = { { _LIST_APPEND, OPARG_SIMPLE, 0 } } }, [LIST_EXTEND] = { .nuops = 1, .uops = { { _LIST_EXTEND, OPARG_SIMPLE, 0 } } }, [LOAD_ATTR] = { .nuops = 1, .uops = { { _LOAD_ATTR, OPARG_SIMPLE, 8 } } }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8ed5436eb6838c..653285a2c6b79b 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -21,14 +21,6 @@ typedef struct _PyExecutorLinkListNode { } _PyExecutorLinkListNode; -/* Bloom filter with m = 256 - * https://en.wikipedia.org/wiki/Bloom_filter */ -#define _Py_BLOOM_FILTER_WORDS 8 - -typedef struct { - uint32_t bits[_Py_BLOOM_FILTER_WORDS]; -} _PyBloomFilter; - typedef struct { uint8_t opcode; uint8_t oparg; @@ -44,7 +36,9 @@ typedef struct { typedef struct _PyExitData { uint32_t target; - uint16_t index; + uint16_t index:14; + uint16_t is_dynamic:1; + uint16_t is_control_flow:1; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; @@ -94,9 +88,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // This value is arbitrary and was not optimized. #define JIT_CLEANUP_THRESHOLD 1000 -#define TRACE_STACK_SIZE 5 - -int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, +int _Py_uop_analyze_and_optimize( + PyFunctionObject *func, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); @@ -130,7 +123,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) -#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) +#define MAX_ABSTRACT_FRAME_DEPTH (16) // The maximum number of side exits that we can take before requiring forward // progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this @@ -258,6 +251,7 @@ struct _Py_UOpsAbstractFrame { int stack_len; int locals_len; PyFunctionObject *func; + PyCodeObject *code; JitOptRef *stack_pointer; JitOptRef *stack; @@ -333,11 +327,11 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( int curr_stackentries, JitOptRef *args, int arg_len); -extern int _Py_uop_frame_pop(JitOptContext *ctx); +extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); -PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth); +PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, PyThreadState *tstate); static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) { @@ -346,6 +340,7 @@ static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) } extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void); +extern _PyExecutorObject *_PyExecutor_GetColdDynamicExecutor(void); PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit); @@ -354,7 +349,9 @@ static inline int is_terminator(const _PyUOpInstruction *uop) int opcode = uop->opcode; return ( opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP + opcode == _DEOPT || + opcode == _JUMP_TO_TOP || + opcode == _DYNAMIC_EXIT ); } @@ -365,6 +362,18 @@ PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp); #endif +int _PyJit_translate_single_bytecode_to_trace(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *next_instr, bool stop_tracing); + +int +_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, + _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *start_instr, + _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, + int oparg); + +void _PyJit_FinalizeTracing(PyThreadState *tstate); + +void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index f954f1b63ef67c..04a557e1204064 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -60,6 +60,12 @@ extern PyObject * _Py_CompileStringObjectWithModule( # define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2) #endif +#ifdef _Py_THREAD_SANITIZER +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 6) +#else +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 3) +#endif + #ifdef __cplusplus } diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 29ebdfd7e01613..50048801b2e4ee 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,7 +12,8 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr - +#include "pycore_uop.h" // struct _PyUOpInstruction +#include "pycore_structs.h" #ifdef Py_GIL_DISABLED struct _gc_thread_state { @@ -21,6 +22,38 @@ struct _gc_thread_state { }; #endif +#if _Py_TIER2 +typedef struct _PyJitTracerInitialState { + int stack_depth; + int chain_depth; + struct _PyExitData *exit; + PyCodeObject *code; // Strong + PyFunctionObject *func; // Strong + _Py_CODEUNIT *start_instr; + _Py_CODEUNIT *close_loop_instr; + _Py_CODEUNIT *jump_backward_instr; +} _PyJitTracerInitialState; + +typedef struct _PyJitTracerPreviousState { + bool dependencies_still_valid; + bool instr_is_super; + int code_max_size; + int code_curr_size; + int instr_oparg; + int instr_stacklevel; + _Py_CODEUNIT *instr; + PyCodeObject *instr_code; // Strong + struct _PyInterpreterFrame *instr_frame; + _PyBloomFilter dependencies; +} _PyJitTracerPreviousState; + +typedef struct _PyJitTracerState { + _PyUOpInstruction *code_buffer; + _PyJitTracerInitialState initial_state; + _PyJitTracerPreviousState prev_state; +} _PyJitTracerState; +#endif + // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. @@ -37,6 +70,10 @@ typedef struct _PyThreadStateImpl { uintptr_t c_stack_soft_limit; uintptr_t c_stack_hard_limit; + // PyUnstable_ThreadState_ResetStackProtection() values + uintptr_t c_stack_init_base; + uintptr_t c_stack_init_top; + PyObject *asyncio_running_loop; // Strong reference PyObject *asyncio_running_task; // Strong reference @@ -81,7 +118,9 @@ typedef struct _PyThreadStateImpl { #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) Py_ssize_t reftotal; // this thread's total refcount operations #endif - +#if _Py_TIER2 + _PyJitTracerState jit_tracer_state; +#endif } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h index 4abefd3b95d21a..4e1b15af42caa3 100644 --- a/Include/internal/pycore_uop.h +++ b/Include/internal/pycore_uop.h @@ -35,10 +35,18 @@ typedef struct _PyUOpInstruction{ #endif } _PyUOpInstruction; -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 +// This is the length of the trace we translate initially. +#define UOP_MAX_TRACE_LENGTH 3000 #define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define _Py_BLOOM_FILTER_WORDS 8 + +typedef struct { + uint32_t bits[_Py_BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ff1d75c0cb1938..7a33a5b84fd21a 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -81,101 +81,107 @@ extern "C" { #define _CHECK_STACK_SPACE 357 #define _CHECK_STACK_SPACE_OPERAND 358 #define _CHECK_VALIDITY 359 -#define _COLD_EXIT 360 -#define _COMPARE_OP 361 -#define _COMPARE_OP_FLOAT 362 -#define _COMPARE_OP_INT 363 -#define _COMPARE_OP_STR 364 -#define _CONTAINS_OP 365 -#define _CONTAINS_OP_DICT 366 -#define _CONTAINS_OP_SET 367 +#define _COLD_DYNAMIC_EXIT 360 +#define _COLD_EXIT 361 +#define _COMPARE_OP 362 +#define _COMPARE_OP_FLOAT 363 +#define _COMPARE_OP_INT 364 +#define _COMPARE_OP_STR 365 +#define _CONTAINS_OP 366 +#define _CONTAINS_OP_DICT 367 +#define _CONTAINS_OP_SET 368 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 368 -#define _COPY_1 369 -#define _COPY_2 370 -#define _COPY_3 371 +#define _COPY 369 +#define _COPY_1 370 +#define _COPY_2 371 +#define _COPY_3 372 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 372 +#define _CREATE_INIT_FRAME 373 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 373 +#define _DEOPT 374 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 374 -#define _DO_CALL_FUNCTION_EX 375 -#define _DO_CALL_KW 376 +#define _DO_CALL 375 +#define _DO_CALL_FUNCTION_EX 376 +#define _DO_CALL_KW 377 +#define _DYNAMIC_EXIT 378 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 377 +#define _ERROR_POP_N 379 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 378 -#define _EXPAND_METHOD_KW 379 -#define _FATAL_ERROR 380 +#define _EXPAND_METHOD 380 +#define _EXPAND_METHOD_KW 381 +#define _FATAL_ERROR 382 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 381 -#define _FOR_ITER_GEN_FRAME 382 -#define _FOR_ITER_TIER_TWO 383 +#define _FOR_ITER 383 +#define _FOR_ITER_GEN_FRAME 384 +#define _FOR_ITER_TIER_TWO 385 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 384 -#define _GUARD_CALLABLE_ISINSTANCE 385 -#define _GUARD_CALLABLE_LEN 386 -#define _GUARD_CALLABLE_LIST_APPEND 387 -#define _GUARD_CALLABLE_STR_1 388 -#define _GUARD_CALLABLE_TUPLE_1 389 -#define _GUARD_CALLABLE_TYPE_1 390 -#define _GUARD_DORV_NO_DICT 391 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 392 -#define _GUARD_GLOBALS_VERSION 393 -#define _GUARD_IS_FALSE_POP 394 -#define _GUARD_IS_NONE_POP 395 -#define _GUARD_IS_NOT_NONE_POP 396 -#define _GUARD_IS_TRUE_POP 397 -#define _GUARD_KEYS_VERSION 398 -#define _GUARD_NOS_DICT 399 -#define _GUARD_NOS_FLOAT 400 -#define _GUARD_NOS_INT 401 -#define _GUARD_NOS_LIST 402 -#define _GUARD_NOS_NOT_NULL 403 -#define _GUARD_NOS_NULL 404 -#define _GUARD_NOS_OVERFLOWED 405 -#define _GUARD_NOS_TUPLE 406 -#define _GUARD_NOS_UNICODE 407 -#define _GUARD_NOT_EXHAUSTED_LIST 408 -#define _GUARD_NOT_EXHAUSTED_RANGE 409 -#define _GUARD_NOT_EXHAUSTED_TUPLE 410 -#define _GUARD_THIRD_NULL 411 -#define _GUARD_TOS_ANY_SET 412 -#define _GUARD_TOS_DICT 413 -#define _GUARD_TOS_FLOAT 414 -#define _GUARD_TOS_INT 415 -#define _GUARD_TOS_LIST 416 -#define _GUARD_TOS_OVERFLOWED 417 -#define _GUARD_TOS_SLICE 418 -#define _GUARD_TOS_TUPLE 419 -#define _GUARD_TOS_UNICODE 420 -#define _GUARD_TYPE_VERSION 421 -#define _GUARD_TYPE_VERSION_AND_LOCK 422 -#define _HANDLE_PENDING_AND_DEOPT 423 +#define _GUARD_BINARY_OP_EXTEND 386 +#define _GUARD_CALLABLE_ISINSTANCE 387 +#define _GUARD_CALLABLE_LEN 388 +#define _GUARD_CALLABLE_LIST_APPEND 389 +#define _GUARD_CALLABLE_STR_1 390 +#define _GUARD_CALLABLE_TUPLE_1 391 +#define _GUARD_CALLABLE_TYPE_1 392 +#define _GUARD_DORV_NO_DICT 393 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 394 +#define _GUARD_GLOBALS_VERSION 395 +#define _GUARD_IP_RETURN_GENERATOR 396 +#define _GUARD_IP_RETURN_VALUE 397 +#define _GUARD_IP_YIELD_VALUE 398 +#define _GUARD_IP__PUSH_FRAME 399 +#define _GUARD_IS_FALSE_POP 400 +#define _GUARD_IS_NONE_POP 401 +#define _GUARD_IS_NOT_NONE_POP 402 +#define _GUARD_IS_TRUE_POP 403 +#define _GUARD_KEYS_VERSION 404 +#define _GUARD_NOS_DICT 405 +#define _GUARD_NOS_FLOAT 406 +#define _GUARD_NOS_INT 407 +#define _GUARD_NOS_LIST 408 +#define _GUARD_NOS_NOT_NULL 409 +#define _GUARD_NOS_NULL 410 +#define _GUARD_NOS_OVERFLOWED 411 +#define _GUARD_NOS_TUPLE 412 +#define _GUARD_NOS_UNICODE 413 +#define _GUARD_NOT_EXHAUSTED_LIST 414 +#define _GUARD_NOT_EXHAUSTED_RANGE 415 +#define _GUARD_NOT_EXHAUSTED_TUPLE 416 +#define _GUARD_THIRD_NULL 417 +#define _GUARD_TOS_ANY_SET 418 +#define _GUARD_TOS_DICT 419 +#define _GUARD_TOS_FLOAT 420 +#define _GUARD_TOS_INT 421 +#define _GUARD_TOS_LIST 422 +#define _GUARD_TOS_OVERFLOWED 423 +#define _GUARD_TOS_SLICE 424 +#define _GUARD_TOS_TUPLE 425 +#define _GUARD_TOS_UNICODE 426 +#define _GUARD_TYPE_VERSION 427 +#define _GUARD_TYPE_VERSION_AND_LOCK 428 +#define _HANDLE_PENDING_AND_DEOPT 429 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 424 -#define _INIT_CALL_PY_EXACT_ARGS 425 -#define _INIT_CALL_PY_EXACT_ARGS_0 426 -#define _INIT_CALL_PY_EXACT_ARGS_1 427 -#define _INIT_CALL_PY_EXACT_ARGS_2 428 -#define _INIT_CALL_PY_EXACT_ARGS_3 429 -#define _INIT_CALL_PY_EXACT_ARGS_4 430 -#define _INSERT_NULL 431 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 430 +#define _INIT_CALL_PY_EXACT_ARGS 431 +#define _INIT_CALL_PY_EXACT_ARGS_0 432 +#define _INIT_CALL_PY_EXACT_ARGS_1 433 +#define _INIT_CALL_PY_EXACT_ARGS_2 434 +#define _INIT_CALL_PY_EXACT_ARGS_3 435 +#define _INIT_CALL_PY_EXACT_ARGS_4 436 +#define _INSERT_NULL 437 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -185,177 +191,178 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 432 +#define _IS_NONE 438 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 433 -#define _ITER_CHECK_RANGE 434 -#define _ITER_CHECK_TUPLE 435 -#define _ITER_JUMP_LIST 436 -#define _ITER_JUMP_RANGE 437 -#define _ITER_JUMP_TUPLE 438 -#define _ITER_NEXT_LIST 439 -#define _ITER_NEXT_LIST_TIER_TWO 440 -#define _ITER_NEXT_RANGE 441 -#define _ITER_NEXT_TUPLE 442 -#define _JUMP_TO_TOP 443 +#define _ITER_CHECK_LIST 439 +#define _ITER_CHECK_RANGE 440 +#define _ITER_CHECK_TUPLE 441 +#define _ITER_JUMP_LIST 442 +#define _ITER_JUMP_RANGE 443 +#define _ITER_JUMP_TUPLE 444 +#define _ITER_NEXT_LIST 445 +#define _ITER_NEXT_LIST_TIER_TWO 446 +#define _ITER_NEXT_RANGE 447 +#define _ITER_NEXT_TUPLE 448 +#define _JUMP_BACKWARD_NO_INTERRUPT JUMP_BACKWARD_NO_INTERRUPT +#define _JUMP_TO_TOP 449 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 444 -#define _LOAD_ATTR_CLASS 445 +#define _LOAD_ATTR 450 +#define _LOAD_ATTR_CLASS 451 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 446 -#define _LOAD_ATTR_METHOD_LAZY_DICT 447 -#define _LOAD_ATTR_METHOD_NO_DICT 448 -#define _LOAD_ATTR_METHOD_WITH_VALUES 449 -#define _LOAD_ATTR_MODULE 450 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 451 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 452 -#define _LOAD_ATTR_PROPERTY_FRAME 453 -#define _LOAD_ATTR_SLOT 454 -#define _LOAD_ATTR_WITH_HINT 455 +#define _LOAD_ATTR_INSTANCE_VALUE 452 +#define _LOAD_ATTR_METHOD_LAZY_DICT 453 +#define _LOAD_ATTR_METHOD_NO_DICT 454 +#define _LOAD_ATTR_METHOD_WITH_VALUES 455 +#define _LOAD_ATTR_MODULE 456 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 457 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 458 +#define _LOAD_ATTR_PROPERTY_FRAME 459 +#define _LOAD_ATTR_SLOT 460 +#define _LOAD_ATTR_WITH_HINT 461 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 456 +#define _LOAD_BYTECODE 462 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 457 -#define _LOAD_CONST_INLINE_BORROW 458 -#define _LOAD_CONST_UNDER_INLINE 459 -#define _LOAD_CONST_UNDER_INLINE_BORROW 460 +#define _LOAD_CONST_INLINE 463 +#define _LOAD_CONST_INLINE_BORROW 464 +#define _LOAD_CONST_UNDER_INLINE 465 +#define _LOAD_CONST_UNDER_INLINE_BORROW 466 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 461 -#define _LOAD_FAST_0 462 -#define _LOAD_FAST_1 463 -#define _LOAD_FAST_2 464 -#define _LOAD_FAST_3 465 -#define _LOAD_FAST_4 466 -#define _LOAD_FAST_5 467 -#define _LOAD_FAST_6 468 -#define _LOAD_FAST_7 469 +#define _LOAD_FAST 467 +#define _LOAD_FAST_0 468 +#define _LOAD_FAST_1 469 +#define _LOAD_FAST_2 470 +#define _LOAD_FAST_3 471 +#define _LOAD_FAST_4 472 +#define _LOAD_FAST_5 473 +#define _LOAD_FAST_6 474 +#define _LOAD_FAST_7 475 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 470 -#define _LOAD_FAST_BORROW_0 471 -#define _LOAD_FAST_BORROW_1 472 -#define _LOAD_FAST_BORROW_2 473 -#define _LOAD_FAST_BORROW_3 474 -#define _LOAD_FAST_BORROW_4 475 -#define _LOAD_FAST_BORROW_5 476 -#define _LOAD_FAST_BORROW_6 477 -#define _LOAD_FAST_BORROW_7 478 +#define _LOAD_FAST_BORROW 476 +#define _LOAD_FAST_BORROW_0 477 +#define _LOAD_FAST_BORROW_1 478 +#define _LOAD_FAST_BORROW_2 479 +#define _LOAD_FAST_BORROW_3 480 +#define _LOAD_FAST_BORROW_4 481 +#define _LOAD_FAST_BORROW_5 482 +#define _LOAD_FAST_BORROW_6 483 +#define _LOAD_FAST_BORROW_7 484 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 479 -#define _LOAD_GLOBAL_BUILTINS 480 -#define _LOAD_GLOBAL_MODULE 481 +#define _LOAD_GLOBAL 485 +#define _LOAD_GLOBAL_BUILTINS 486 +#define _LOAD_GLOBAL_MODULE 487 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 482 -#define _LOAD_SMALL_INT_0 483 -#define _LOAD_SMALL_INT_1 484 -#define _LOAD_SMALL_INT_2 485 -#define _LOAD_SMALL_INT_3 486 -#define _LOAD_SPECIAL 487 +#define _LOAD_SMALL_INT 488 +#define _LOAD_SMALL_INT_0 489 +#define _LOAD_SMALL_INT_1 490 +#define _LOAD_SMALL_INT_2 491 +#define _LOAD_SMALL_INT_3 492 +#define _LOAD_SPECIAL 493 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 488 +#define _MAKE_CALLARGS_A_TUPLE 494 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 489 +#define _MAKE_WARM 495 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 490 -#define _MAYBE_EXPAND_METHOD_KW 491 -#define _MONITOR_CALL 492 -#define _MONITOR_CALL_KW 493 -#define _MONITOR_JUMP_BACKWARD 494 -#define _MONITOR_RESUME 495 +#define _MAYBE_EXPAND_METHOD 496 +#define _MAYBE_EXPAND_METHOD_KW 497 +#define _MONITOR_CALL 498 +#define _MONITOR_CALL_KW 499 +#define _MONITOR_JUMP_BACKWARD 500 +#define _MONITOR_RESUME 501 #define _NOP NOP -#define _POP_CALL 496 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 497 -#define _POP_CALL_ONE 498 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 499 -#define _POP_CALL_TWO 500 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 501 +#define _POP_CALL 502 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 503 +#define _POP_CALL_ONE 504 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 505 +#define _POP_CALL_TWO 506 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 507 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 502 -#define _POP_JUMP_IF_TRUE 503 +#define _POP_JUMP_IF_FALSE 508 +#define _POP_JUMP_IF_TRUE 509 #define _POP_TOP POP_TOP -#define _POP_TOP_FLOAT 504 -#define _POP_TOP_INT 505 -#define _POP_TOP_LOAD_CONST_INLINE 506 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 507 -#define _POP_TOP_NOP 508 -#define _POP_TOP_UNICODE 509 -#define _POP_TWO 510 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 511 +#define _POP_TOP_FLOAT 510 +#define _POP_TOP_INT 511 +#define _POP_TOP_LOAD_CONST_INLINE 512 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 513 +#define _POP_TOP_NOP 514 +#define _POP_TOP_UNICODE 515 +#define _POP_TWO 516 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 517 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 512 +#define _PUSH_FRAME 518 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 513 -#define _PY_FRAME_GENERAL 514 -#define _PY_FRAME_KW 515 -#define _QUICKEN_RESUME 516 -#define _REPLACE_WITH_TRUE 517 +#define _PUSH_NULL_CONDITIONAL 519 +#define _PY_FRAME_GENERAL 520 +#define _PY_FRAME_KW 521 +#define _QUICKEN_RESUME 522 +#define _REPLACE_WITH_TRUE 523 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 518 -#define _SEND 519 -#define _SEND_GEN_FRAME 520 +#define _SAVE_RETURN_OFFSET 524 +#define _SEND 525 +#define _SEND_GEN_FRAME 526 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 521 -#define _STORE_ATTR 522 -#define _STORE_ATTR_INSTANCE_VALUE 523 -#define _STORE_ATTR_SLOT 524 -#define _STORE_ATTR_WITH_HINT 525 +#define _START_EXECUTOR 527 +#define _STORE_ATTR 528 +#define _STORE_ATTR_INSTANCE_VALUE 529 +#define _STORE_ATTR_SLOT 530 +#define _STORE_ATTR_WITH_HINT 531 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 526 -#define _STORE_FAST_0 527 -#define _STORE_FAST_1 528 -#define _STORE_FAST_2 529 -#define _STORE_FAST_3 530 -#define _STORE_FAST_4 531 -#define _STORE_FAST_5 532 -#define _STORE_FAST_6 533 -#define _STORE_FAST_7 534 +#define _STORE_FAST 532 +#define _STORE_FAST_0 533 +#define _STORE_FAST_1 534 +#define _STORE_FAST_2 535 +#define _STORE_FAST_3 536 +#define _STORE_FAST_4 537 +#define _STORE_FAST_5 538 +#define _STORE_FAST_6 539 +#define _STORE_FAST_7 540 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 535 -#define _STORE_SUBSCR 536 -#define _STORE_SUBSCR_DICT 537 -#define _STORE_SUBSCR_LIST_INT 538 -#define _SWAP 539 -#define _SWAP_2 540 -#define _SWAP_3 541 -#define _TIER2_RESUME_CHECK 542 -#define _TO_BOOL 543 +#define _STORE_SLICE 541 +#define _STORE_SUBSCR 542 +#define _STORE_SUBSCR_DICT 543 +#define _STORE_SUBSCR_LIST_INT 544 +#define _SWAP 545 +#define _SWAP_2 546 +#define _SWAP_3 547 +#define _TIER2_RESUME_CHECK 548 +#define _TO_BOOL 549 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 544 +#define _TO_BOOL_LIST 550 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 545 +#define _TO_BOOL_STR 551 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 546 -#define _UNPACK_SEQUENCE_LIST 547 -#define _UNPACK_SEQUENCE_TUPLE 548 -#define _UNPACK_SEQUENCE_TWO_TUPLE 549 +#define _UNPACK_SEQUENCE 552 +#define _UNPACK_SEQUENCE_LIST 553 +#define _UNPACK_SEQUENCE_TUPLE 554 +#define _UNPACK_SEQUENCE_TWO_TUPLE 555 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 549 +#define MAX_UOP_ID 555 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1248771996943b..d5a3c362d875e6 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -11,7 +11,7 @@ extern "C" { #include #include "pycore_uop_ids.h" -extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1]; +extern const uint32_t _PyUop_Flags[MAX_UOP_ID+1]; typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange; extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1]; extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; @@ -19,7 +19,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA -const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { +const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_NOP] = HAS_PURE_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -128,12 +128,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_RETURN_VALUE] = HAS_ESCAPES_FLAG, + [_RETURN_VALUE] = HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SEND_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_YIELD_VALUE] = HAS_ARG_FLAG, + [_YIELD_VALUE] = HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_POP_EXCEPT] = HAS_ESCAPES_FLAG, [_LOAD_COMMON_CONSTANT] = HAS_ARG_FLAG, [_LOAD_BUILD_CLASS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -256,7 +256,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = 0, + [_PUSH_FRAME] = HAS_NEEDS_GUARD_IP_FLAG, [_GUARD_NOS_NULL] = HAS_DEOPT_FLAG, [_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG, [_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG, @@ -293,7 +293,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_MAKE_CALLARGS_A_TUPLE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG, - [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -315,6 +315,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, + [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -336,7 +337,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_HANDLE_PENDING_AND_DEOPT] = HAS_ESCAPES_FLAG, [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG, - [_COLD_EXIT] = HAS_ESCAPES_FLAG, + [_COLD_EXIT] = 0, + [_COLD_DYNAMIC_EXIT] = 0, + [_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG, + [_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG, + [_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG, + [_GUARD_IP_RETURN_GENERATOR] = HAS_EXIT_FLAG, }; const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { @@ -419,6 +425,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_COLD_DYNAMIC_EXIT] = "_COLD_DYNAMIC_EXIT", [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", @@ -443,6 +450,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_DEOPT] = "_DEOPT", [_DICT_MERGE] = "_DICT_MERGE", [_DICT_UPDATE] = "_DICT_UPDATE", + [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT", [_END_FOR] = "_END_FOR", [_END_SEND] = "_END_SEND", [_ERROR_POP_N] = "_ERROR_POP_N", @@ -471,6 +479,10 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", + [_GUARD_IP_RETURN_GENERATOR] = "_GUARD_IP_RETURN_GENERATOR", + [_GUARD_IP_RETURN_VALUE] = "_GUARD_IP_RETURN_VALUE", + [_GUARD_IP_YIELD_VALUE] = "_GUARD_IP_YIELD_VALUE", + [_GUARD_IP__PUSH_FRAME] = "_GUARD_IP__PUSH_FRAME", [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", @@ -1261,6 +1273,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _EXIT_TRACE: return 0; + case _DYNAMIC_EXIT: + return 0; case _CHECK_VALIDITY: return 0; case _LOAD_CONST_INLINE: @@ -1305,6 +1319,16 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _COLD_EXIT: return 0; + case _COLD_DYNAMIC_EXIT: + return 0; + case _GUARD_IP__PUSH_FRAME: + return 0; + case _GUARD_IP_YIELD_VALUE: + return 0; + case _GUARD_IP_RETURN_VALUE: + return 0; + case _GUARD_IP_RETURN_GENERATOR: + return 0; default: return -1; } diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 2166dbff0ee70c..33907b1fc2a53a 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -150,33 +150,42 @@ def evaluate( if globals is None: globals = {} + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + if locals is None: locals = {} if isinstance(owner, type): locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) - if type_params is None and owner is not None: - # "Inject" type parameters into the local namespace - # (unless they are shadowed by assignments *in* the local namespace), - # as a way of emulating annotation scopes when calling `eval()` - type_params = getattr(owner, "__type_params__", None) - - # Type parameters exist in their own scope, which is logically - # between the locals and the globals. We simulate this by adding - # them to the globals. Similar reasoning applies to nonlocals stored in cells. - if type_params is not None or isinstance(self.__cell__, dict): - globals = dict(globals) + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` if type_params is not None: for param in type_params: - globals[param.__name__] = param + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. if isinstance(self.__cell__, dict): - for cell_name, cell_value in self.__cell__.items(): + for cell_name, cell in self.__cell__.items(): try: - globals[cell_name] = cell_value.cell_contents + cell_value = cell.cell_contents except ValueError: pass + else: + locals.setdefault(cell_name, cell_value) + if self.__extra_names__: - locals = {**locals, **self.__extra_names__} + locals.update(self.__extra_names__) arg = self.__forward_arg__ if arg.isidentifier() and not keyword.iskeyword(arg): diff --git a/Lib/functools.py b/Lib/functools.py index a92844ba7227b0..8063eb5ffc3304 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -1083,7 +1083,10 @@ def __call__(self, /, *args, **kwargs): 'singledispatchmethod method') raise TypeError(f'{funcname} requires at least ' '1 positional argument') - return self._dispatch(args[0].__class__).__get__(self._obj, self._cls)(*args, **kwargs) + method = self._dispatch(args[0].__class__) + if hasattr(method, "__get__"): + method = method.__get__(self._obj, self._cls) + return method(*args, **kwargs) def __getattr__(self, name): # Resolve these attributes lazily to speed up creation of diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index 9f3275d5071484..8208d0e9c94819 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -2149,6 +2149,51 @@ def test_fwdref_invalid_syntax(self): with self.assertRaises(SyntaxError): fr.evaluate() + def test_re_evaluate_generics(self): + global global_alias + + # If we've already run this test before, + # ensure the variable is still undefined + if "global_alias" in globals(): + del global_alias + + class C: + x: global_alias[int] + + # Evaluate the ForwardRef once + evaluated = get_annotations(C, format=Format.FORWARDREF)["x"].evaluate( + format=Format.FORWARDREF + ) + + # Now define the global and ensure that the ForwardRef evaluates + global_alias = list + self.assertEqual(evaluated.evaluate(), list[int]) + + def test_fwdref_evaluate_argument_mutation(self): + class C[T]: + nonlocal alias + x: alias[T] + + # Mutable arguments + globals_ = globals() + globals_copy = globals_.copy() + locals_ = locals() + locals_copy = locals_.copy() + + # Evaluate the ForwardRef, ensuring we use __cell__ and type params + get_annotations(C, format=Format.FORWARDREF)["x"].evaluate( + globals=globals_, + locals=locals_, + type_params=C.__type_params__, + format=Format.FORWARDREF, + ) + + # Check if the passed in mutable arguments equal the originals + self.assertEqual(globals_, globals_copy) + self.assertEqual(locals_, locals_copy) + + alias = list + def test_fwdref_final_class(self): with self.assertRaises(TypeError): class C(ForwardRef): diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index fb4a441ca64772..608ffdfad1209a 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -3057,8 +3057,8 @@ def test_source_segment_missing_info(self): class NodeTransformerTests(ASTTestMixin, unittest.TestCase): def assertASTTransformation(self, transformer_class, - initial_code, expected_code): - initial_ast = ast.parse(dedent(initial_code)) + code, expected_code): + initial_ast = ast.parse(dedent(code)) expected_ast = ast.parse(dedent(expected_code)) transformer = transformer_class() diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e65556fb28f92d..f06c6cbda2976c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -422,32 +422,6 @@ def testfunc(n, m): uops = get_opnames(ex) self.assertIn("_FOR_ITER_TIER_TWO", uops) - def test_confidence_score(self): - def testfunc(n): - bits = 0 - for i in range(n): - if i & 0x01: - bits += 1 - if i & 0x02: - bits += 1 - if i&0x04: - bits += 1 - if i&0x08: - bits += 1 - if i&0x10: - bits += 1 - return bits - - x = testfunc(TIER2_THRESHOLD * 2) - - self.assertEqual(x, TIER2_THRESHOLD * 5) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - ops = list(iter_opnames(ex)) - #Since branch is 50/50 the trace could go either way. - count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP") - self.assertLessEqual(count, 2) - @requires_specialization @unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @@ -847,38 +821,7 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_unicode_count), 1) self.assertIn("_COMPARE_OP_STR", uops) - def test_type_inconsistency(self): - ns = {} - src = textwrap.dedent(""" - def testfunc(n): - for i in range(n): - x = _test_global + _test_global - """) - exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - ns['_test_global'] = 1 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNotNone(ex) - uops = get_opnames(ex) - self.assertNotIn("_GUARD_TOS_INT", uops) - self.assertNotIn("_GUARD_NOS_INT", uops) - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) - # Try again, but between the runs, set the global to a float. - # This should result in no executor the second time. - ns = {} - exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - ns['_test_global'] = 3.14 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_sequential(self): def dummy12(x): return x - 1 @@ -907,6 +850,7 @@ def testfunc(n): largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_nested(self): def dummy12(x): return x + 3 @@ -937,6 +881,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_several_calls(self): def dummy12(x): return x + 3 @@ -972,6 +917,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_several_calls_different_order(self): # same as `several_calls` but with top-level calls reversed def dummy12(x): @@ -1008,6 +954,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_complex(self): def dummy0(x): return x @@ -1057,6 +1004,7 @@ def testfunc(n): ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands ) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_large_framesize(self): # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is # actually doing its job. Note that the resulting trace hits @@ -1118,6 +1066,7 @@ def testfunc(n): ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands ) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_recursion(self): def dummy15(x): while x > 0: diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index ce9e7f6d57dd3c..090926fd8d8b61 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -2785,7 +2785,7 @@ class Slot: @functools.singledispatchmethod @classmethod def go(cls, item, arg): - pass + return item - arg @go.register @classmethod @@ -2794,7 +2794,9 @@ def _(cls, item: int, arg): s = Slot() self.assertEqual(s.go(1, 1), 2) + self.assertEqual(s.go(1.5, 1), 0.5) self.assertEqual(Slot.go(1, 1), 2) + self.assertEqual(Slot.go(1.5, 1), 0.5) def test_staticmethod_slotted_class(self): class A: @@ -3485,6 +3487,37 @@ def _(item, arg: bytes) -> str: self.assertEqual(str(Signature.from_callable(A.static_func)), '(item, arg: int) -> str') + def test_method_non_descriptor(self): + class Callable: + def __init__(self, value): + self.value = value + def __call__(self, arg): + return self.value, arg + + class A: + t = functools.singledispatchmethod(Callable('general')) + t.register(int, Callable('special')) + + @functools.singledispatchmethod + def u(self, arg): + return 'general', arg + u.register(int, Callable('special')) + + v = functools.singledispatchmethod(Callable('general')) + @v.register(int) + def _(self, arg): + return 'special', arg + + a = A() + self.assertEqual(a.t(0), ('special', 0)) + self.assertEqual(a.t(2.5), ('general', 2.5)) + self.assertEqual(A.t(0), ('special', 0)) + self.assertEqual(A.t(2.5), ('general', 2.5)) + self.assertEqual(a.u(0), ('special', 0)) + self.assertEqual(a.u(2.5), ('general', 2.5)) + self.assertEqual(a.v(0), ('special', 0)) + self.assertEqual(a.v(2.5), ('general', 2.5)) + class CachedCostItem: _cost = 1 diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9d3248d972e8d1..798f58737b1bf6 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -2253,9 +2253,10 @@ def frame_2_jit(expected: bool) -> None: def frame_3_jit() -> None: # JITs just before the last loop: - for i in range(_testinternalcapi.TIER2_THRESHOLD + 1): + # 1 extra iteration for tracing. + for i in range(_testinternalcapi.TIER2_THRESHOLD + 2): # Careful, doing this in the reverse order breaks tracing: - expected = {enabled} and i == _testinternalcapi.TIER2_THRESHOLD + expected = {enabled} and i >= _testinternalcapi.TIER2_THRESHOLD + 1 assert sys._jit.is_active() is expected frame_2_jit(expected) assert sys._jit.is_active() is expected diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 25c084c8b9c9eb..87811199706a1f 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1436,18 +1436,40 @@ def test_nonexistent_file(self): def test_resource_warnings_not_exhausted(self): # Not exhausting the iterator still closes the underlying file (bpo-43292) + # Not closing before del should emit ResourceWarning it = ET.iterparse(SIMPLE_XMLFILE) with warnings_helper.check_no_resource_warning(self): + it.close() + del it + gc_collect() + + it = ET.iterparse(SIMPLE_XMLFILE) + with self.assertWarns(ResourceWarning) as wm: del it gc_collect() + # Not 'unclosed file'. + self.assertIn('unclosed iterparse iterator', str(wm.warning)) + self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning)) + self.assertEqual(wm.filename, __file__) it = ET.iterparse(SIMPLE_XMLFILE) with warnings_helper.check_no_resource_warning(self): action, elem = next(it) + it.close() self.assertEqual((action, elem.tag), ('end', 'element')) del it, elem gc_collect() + it = ET.iterparse(SIMPLE_XMLFILE) + with self.assertWarns(ResourceWarning) as wm: + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'element')) + del it, elem + gc_collect() + self.assertIn('unclosed iterparse iterator', str(wm.warning)) + self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning)) + self.assertEqual(wm.filename, __file__) + def test_resource_warnings_failed_iteration(self): self.addCleanup(os_helper.unlink, TESTFN) with open(TESTFN, "wb") as f: @@ -1461,15 +1483,40 @@ def test_resource_warnings_failed_iteration(self): next(it) self.assertEqual(str(cm.exception), 'junk after document element: line 1, column 12') + it.close() del cm, it gc_collect() + it = ET.iterparse(TESTFN) + action, elem = next(it) + self.assertEqual((action, elem.tag), ('end', 'document')) + with self.assertWarns(ResourceWarning) as wm: + with self.assertRaises(ET.ParseError) as cm: + next(it) + self.assertEqual(str(cm.exception), + 'junk after document element: line 1, column 12') + del cm, it + gc_collect() + self.assertIn('unclosed iterparse iterator', str(wm.warning)) + self.assertIn(repr(TESTFN), str(wm.warning)) + self.assertEqual(wm.filename, __file__) + def test_resource_warnings_exhausted(self): it = ET.iterparse(SIMPLE_XMLFILE) with warnings_helper.check_no_resource_warning(self): + list(it) + it.close() + del it + gc_collect() + + it = ET.iterparse(SIMPLE_XMLFILE) + with self.assertWarns(ResourceWarning) as wm: list(it) del it gc_collect() + self.assertIn('unclosed iterparse iterator', str(wm.warning)) + self.assertIn(repr(SIMPLE_XMLFILE), str(wm.warning)) + self.assertEqual(wm.filename, __file__) def test_close_not_exhausted(self): iterparse = ET.iterparse diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index dafe5b1b8a0c3f..d8c0b1b621684b 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1261,16 +1261,20 @@ def iterator(source): gen = iterator(source) class IterParseIterator(collections.abc.Iterator): __next__ = gen.__next__ + def close(self): + nonlocal close_source if close_source: source.close() + close_source = False gen.close() - def __del__(self): - # TODO: Emit a ResourceWarning if it was not explicitly closed. - # (When the close() method will be supported in all maintained Python versions.) + def __del__(self, _warn=warnings.warn): if close_source: - source.close() + try: + _warn(f"unclosed iterparse iterator {source.name!r}", ResourceWarning, stacklevel=2) + finally: + source.close() it = IterParseIterator() it.root = None diff --git a/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst b/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst new file mode 100644 index 00000000000000..cd3d5262fa0f3a --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and +:c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set the +stack protection base address and stack protection size of a Python thread +state. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-18-21-50-44.gh-issue-139109.9QQOzN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-18-21-50-44.gh-issue-139109.9QQOzN.rst new file mode 100644 index 00000000000000..40b9d19ee42968 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-18-21-50-44.gh-issue-139109.9QQOzN.rst @@ -0,0 +1 @@ +A new tracing frontend for the JIT compiler has been implemented. Patch by Ken Jin. Design for CPython by Ken Jin, Mark Shannon and Brandt Bucher. diff --git a/Misc/NEWS.d/next/Library/2025-10-25-22-55-07.gh-issue-140601.In3MlS.rst b/Misc/NEWS.d/next/Library/2025-10-25-22-55-07.gh-issue-140601.In3MlS.rst new file mode 100644 index 00000000000000..72666bb8224d63 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-25-22-55-07.gh-issue-140601.In3MlS.rst @@ -0,0 +1,4 @@ +:func:`xml.etree.ElementTree.iterparse` now emits a :exc:`ResourceWarning` +when the iterator is not explicitly closed and was opened with a filename. +This helps developers identify and fix resource leaks. Patch by Osama +Abdelkader. diff --git a/Misc/NEWS.d/next/Library/2025-11-01-14-44-09.gh-issue-140873.kfuc9B.rst b/Misc/NEWS.d/next/Library/2025-11-01-14-44-09.gh-issue-140873.kfuc9B.rst new file mode 100644 index 00000000000000..e15057640646d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-01-14-44-09.gh-issue-140873.kfuc9B.rst @@ -0,0 +1,2 @@ +Add support of non-:term:`descriptor` callables in +:func:`functools.singledispatchmethod`. diff --git a/Misc/NEWS.d/next/Library/2025-11-04-15-40-35.gh-issue-137969.9VZQVt.rst b/Misc/NEWS.d/next/Library/2025-11-04-15-40-35.gh-issue-137969.9VZQVt.rst new file mode 100644 index 00000000000000..dfa582bdbc8825 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-04-15-40-35.gh-issue-137969.9VZQVt.rst @@ -0,0 +1,3 @@ +Fix :meth:`annotationlib.ForwardRef.evaluate` returning +:class:`~annotationlib.ForwardRef` objects which don't update with new +globals. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index dede05960d78b6..89e558b0fe8933 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2446,6 +2446,58 @@ module_get_gc_hooks(PyObject *self, PyObject *arg) return result; } + +static void +check_threadstate_set_stack_protection(PyThreadState *tstate, + void *start, size_t size) +{ + assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == 0); + assert(!PyErr_Occurred()); + + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_top == (uintptr_t)start + size); + assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); +} + + +static PyObject * +test_threadstate_set_stack_protection(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyThreadState *tstate = PyThreadState_GET(); + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(!PyErr_Occurred()); + + uintptr_t init_base = ts->c_stack_init_base; + size_t init_top = ts->c_stack_init_top; + + // Test the minimum stack size + size_t size = _PyOS_MIN_STACK_SIZE; + void *start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack_protection(tstate, start, size); + + // Test a larger size + size = 7654321; + assert(size > _PyOS_MIN_STACK_SIZE); + start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack_protection(tstate, start, size); + + // Test invalid size (too small) + size = 5; + start = (void*)(_Py_get_machine_stack_pointer() - size); + assert(PyUnstable_ThreadState_SetStackProtection(tstate, start, size) == -1); + assert(PyErr_ExceptionMatches(PyExc_ValueError)); + PyErr_Clear(); + + // Test PyUnstable_ThreadState_ResetStackProtection() + PyUnstable_ThreadState_ResetStackProtection(tstate); + assert(ts->c_stack_init_base == init_base); + assert(ts->c_stack_init_top == init_top); + + Py_RETURN_NONE; +} + + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2556,6 +2608,8 @@ static PyMethodDef module_functions[] = { {"simple_pending_call", simple_pending_call, METH_O}, {"set_vectorcall_nop", set_vectorcall_nop, METH_O}, {"module_get_gc_hooks", module_get_gc_hooks, METH_O}, + {"test_threadstate_set_stack_protection", + test_threadstate_set_stack_protection, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; @@ -2607,7 +2661,8 @@ module_exec(PyObject *module) } if (PyModule_Add(module, "TIER2_THRESHOLD", - PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE + 1)) < 0) { + // + 1 more due to one loop spent on tracing. + PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE + 2)) < 0) { return 1; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index fc3f5d9dde0bc1..3aea2038fd17e7 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2432,6 +2432,7 @@ code_dealloc(PyObject *self) PyMem_Free(co_extra); } #ifdef _Py_TIER2 + _PyJit_Tracer_InvalidateDependency(tstate, self); if (co->co_executors != NULL) { clear_executors(co); } diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 0cae3703d1d0c6..b652973600c17d 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -17,6 +17,7 @@ #include "frameobject.h" // PyFrameLocalsProxyObject #include "opcode.h" // EXTENDED_ARG +#include "pycore_optimizer.h" #include "clinic/frameobject.c.h" @@ -260,7 +261,10 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) return -1; } - _Py_Executors_InvalidateDependency(PyInterpreterState_Get(), co, 1); +#if _Py_TIER2 + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1); + _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co); +#endif _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); _PyStackRef oldvalue = fast[i]; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 43198aaf8a7048..b659ac8023373b 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -11,7 +11,7 @@ #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() - +#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency static const char * func_event_name(PyFunction_WatchEvent event) { @@ -1151,6 +1151,10 @@ func_dealloc(PyObject *self) if (_PyObject_ResurrectEnd(self)) { return; } +#if _Py_TIER2 + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1); + _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self); +#endif _PyObject_GC_UNTRACK(op); FT_CLEAR_WEAKREFS(self, op->func_weakreflist); (void)func_clear((PyObject*)op); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ebd9ebdfce1bb..2c798855a71f55 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2938,8 +2938,8 @@ dummy_func( JUMP_BACKWARD_JIT, }; - tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { - #if ENABLE_SPECIALIZATION_FT + specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { + #if ENABLE_SPECIALIZATION if (this_instr->op.code == JUMP_BACKWARD) { uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); @@ -2953,25 +2953,21 @@ dummy_func( tier1 op(_JIT, (--)) { #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { - _Py_CODEUNIT *start = this_instr; - /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && + this_instr->op.code == JUMP_BACKWARD_JIT && + next_instr->op.code != ENTER_EXECUTOR) { + /* Back up over EXTENDED_ARGs so executor is inserted at the correct place */ + _Py_CODEUNIT *insert_exec_at = this_instr; while (oparg > 255) { oparg >>= 8; - start--; + insert_exec_at--; } - _PyExecutorObject *executor; - int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); - if (optimized <= 0) { - this_instr[1].counter = restart_backoff_counter(counter); - ERROR_IF(optimized < 0); + int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, oparg); + if (succ) { + ENTER_TRACING(); } else { - this_instr[1].counter = initial_jump_backoff_counter(); - assert(tstate->current_executor == NULL); - assert(executor != tstate->interp->cold_executor); - tstate->jit_exit = NULL; - TIER1_TO_TIER2(executor); + this_instr[1].counter = restart_backoff_counter(counter); } } else { @@ -3017,6 +3013,10 @@ dummy_func( tier1 inst(ENTER_EXECUTOR, (--)) { #ifdef _Py_TIER2 + if (IS_JIT_TRACING()) { + next_instr = this_instr; + goto stop_tracing; + } PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -3078,7 +3078,7 @@ dummy_func( macro(POP_JUMP_IF_NOT_NONE) = unused/1 + _IS_NONE + _POP_JUMP_IF_FALSE; - tier1 inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { + replaced inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost * generator or coroutine, so we deliberately do not check it here. @@ -5245,21 +5245,42 @@ dummy_func( tier2 op(_EXIT_TRACE, (exit_p/4 --)) { _PyExitData *exit = (_PyExitData *)exit_p; #if defined(Py_DEBUG) && !defined(_Py_JIT) - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + const _Py_CODEUNIT *target = ((frame->owner == FRAME_OWNED_BY_INTERPRETER) + ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame)) + + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - if (frame->lltrace >= 2) { + if (frame->lltrace >= 3) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", + printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); + _PyOpcode_OpName[target->op.code], exit->is_control_flow); } #endif tstate->jit_exit = exit; TIER2_TO_TIER2(exit->executor); } + tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) { + #if defined(Py_DEBUG) && !defined(_Py_JIT) + _PyExitData *exit = (_PyExitData *)exit_p; + _Py_CODEUNIT *target = frame->instr_ptr; + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (frame->lltrace >= 3) { + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %tu, temp %d, target %d -> %s]\n", + exit - current_executor->exits, exit->temperature.value_and_backoff, + (int)(target - _PyFrame_GetBytecode(frame)), + _PyOpcode_OpName[target->op.code]); + } + #endif + // Disabled for now (gh-139109) as it slows down dynamic code tremendously. + // Compile and jump to the cold dynamic executors in the future. + GOTO_TIER_ONE(frame->instr_ptr); + } + tier2 op(_CHECK_VALIDITY, (--)) { DEOPT_IF(!current_executor->vm_data.valid); } @@ -5369,7 +5390,8 @@ dummy_func( } tier2 op(_DEOPT, (--)) { - GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); + GOTO_TIER_ONE((frame->owner == FRAME_OWNED_BY_INTERPRETER) + ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame) + CURRENT_TARGET()); } tier2 op(_HANDLE_PENDING_AND_DEOPT, (--)) { @@ -5399,32 +5421,76 @@ dummy_func( tier2 op(_COLD_EXIT, ( -- )) { _PyExitData *exit = tstate->jit_exit; assert(exit != NULL); + assert(frame->owner < FRAME_OWNED_BY_INTERPRETER); _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); + assert(tstate->jit_exit == exit); + exit->executor = executor; + TIER2_TO_TIER2(exit->executor); } else { + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = previous_executor->vm_data.chain_depth + 1; - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); + // For control-flow guards, we don't want to increase the chain depth, as those don't actually + // represent deopts but rather just normal programs! + int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow; + // Note: it's safe to use target->op.arg here instead of the oparg given by EXTENDED_ARG. + // The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG. + // So setting it to anything else is wrong. + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, target->op.arg); + exit->temperature = restart_backoff_counter(exit->temperature); + if (succ) { + GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = initial_temperature_backoff_counter(); + GOTO_TIER_ONE(target); + } + } + + tier2 op(_COLD_DYNAMIC_EXIT, ( -- )) { + // TODO (gh-139109): This should be similar to _COLD_EXIT in the future. + _Py_CODEUNIT *target = frame->instr_ptr; + GOTO_TIER_ONE(target); + } + + tier2 op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) { + _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(_PUSH_FRAME); + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += IP_OFFSET_OF(_PUSH_FRAME); + EXIT_IF(true); + } + } + + tier2 op(_GUARD_IP_YIELD_VALUE, (ip/4 --)) { + _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(YIELD_VALUE); + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += IP_OFFSET_OF(YIELD_VALUE); + EXIT_IF(true); + } + } + + tier2 op(_GUARD_IP_RETURN_VALUE, (ip/4 --)) { + _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(RETURN_VALUE); + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += IP_OFFSET_OF(RETURN_VALUE); + EXIT_IF(true); + } + } + + tier2 op(_GUARD_IP_RETURN_GENERATOR, (ip/4 --)) { + _Py_CODEUNIT *target = frame->instr_ptr + IP_OFFSET_OF(RETURN_GENERATOR); + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += IP_OFFSET_OF(RETURN_GENERATOR); + EXIT_IF(true); } - assert(tstate->jit_exit == exit); - exit->executor = executor; - TIER2_TO_TIER2(exit->executor); } label(pop_2_error) { @@ -5571,6 +5637,62 @@ dummy_func( DISPATCH(); } + label(record_previous_inst) { +#if _Py_TIER2 + assert(IS_JIT_TRACING()); + int opcode = next_instr->op.code; + bool stop_tracing = (opcode == WITH_EXCEPT_START || + opcode == RERAISE || opcode == CLEANUP_THROW || + opcode == PUSH_EXC_INFO || opcode == INTERPRETER_EXIT); + int full = !_PyJit_translate_single_bytecode_to_trace(tstate, frame, next_instr, stop_tracing); + if (full) { + LEAVE_TRACING(); + int err = stop_tracing_and_jit(tstate, frame); + ERROR_IF(err < 0); + DISPATCH_GOTO_NON_TRACING(); + } + // Super instructions. Instruction deopted. There's a mismatch in what the stack expects + // in the optimizer. So we have to reflect in the trace correctly. + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND && + opcode == POP_TOP) || + (_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && + opcode == STORE_FAST)) { + _tstate->jit_tracer_state.prev_state.instr_is_super = true; + } + else { + _tstate->jit_tracer_state.prev_state.instr = next_instr; + } + PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable); + if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) { + Py_SETREF(_tstate->jit_tracer_state.prev_state.instr_code, (PyCodeObject*)Py_NewRef((prev_code))); + } + + _tstate->jit_tracer_state.prev_state.instr_frame = frame; + _tstate->jit_tracer_state.prev_state.instr_oparg = oparg; + _tstate->jit_tracer_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); + if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { + (&next_instr[1])->counter = trigger_backoff_counter(); + } + DISPATCH_GOTO_NON_TRACING(); +#else + Py_FatalError("JIT label executed in non-jit build."); +#endif + } + + label(stop_tracing) { +#if _Py_TIER2 + assert(IS_JIT_TRACING()); + int opcode = next_instr->op.code; + _PyJit_translate_single_bytecode_to_trace(tstate, frame, NULL, true); + LEAVE_TRACING(); + int err = stop_tracing_and_jit(tstate, frame); + ERROR_IF(err < 0); + DISPATCH_GOTO_NON_TRACING(); +#else + Py_FatalError("JIT label executed in non-jit build."); +#endif + } // END BYTECODES // diff --git a/Python/ceval.c b/Python/ceval.c index 43e8ee71206566..b76c9ec28119d5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -443,7 +443,7 @@ int pthread_attr_destroy(pthread_attr_t *a) #endif static void -hardware_stack_limits(uintptr_t *top, uintptr_t *base) +hardware_stack_limits(uintptr_t *base, uintptr_t *top) { #ifdef WIN32 ULONG_PTR low, high; @@ -486,23 +486,86 @@ hardware_stack_limits(uintptr_t *top, uintptr_t *base) #endif } -void -_Py_InitializeRecursionLimits(PyThreadState *tstate) +static void +tstate_set_stack(PyThreadState *tstate, + uintptr_t base, uintptr_t top) { - uintptr_t top; - uintptr_t base; - hardware_stack_limits(&top, &base); + assert(base < top); + assert((top - base) >= _PyOS_MIN_STACK_SIZE); + #ifdef _Py_THREAD_SANITIZER // Thread sanitizer crashes if we use more than half the stack. uintptr_t stacksize = top - base; - base += stacksize/2; + base += stacksize / 2; #endif _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; _tstate->c_stack_top = top; _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; + +#ifndef NDEBUG + // Sanity checks + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); +#endif +} + + +void +_Py_InitializeRecursionLimits(PyThreadState *tstate) +{ + uintptr_t base, top; + hardware_stack_limits(&base, &top); + assert(top != 0); + + tstate_set_stack(tstate, base, top); + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + ts->c_stack_init_base = base; + ts->c_stack_init_top = top; + + // Test the stack pointer +#if !defined(NDEBUG) && !defined(__wasi__) + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + assert(ts->c_stack_soft_limit < here_addr); + assert(here_addr < ts->c_stack_top); +#endif } + +int +PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, + void *stack_start_addr, size_t stack_size) +{ + if (stack_size < _PyOS_MIN_STACK_SIZE) { + PyErr_Format(PyExc_ValueError, + "stack_size must be at least %zu bytes", + _PyOS_MIN_STACK_SIZE); + return -1; + } + + uintptr_t base = (uintptr_t)stack_start_addr; + uintptr_t top = base + stack_size; + tstate_set_stack(tstate, base, top); + return 0; +} + + +void +PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) +{ + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + if (ts->c_stack_init_top != 0) { + tstate_set_stack(tstate, + ts->c_stack_init_base, + ts->c_stack_init_top); + return; + } + + _Py_InitializeRecursionLimits(tstate); +} + + /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() if the recursion_depth reaches recursion_limit. */ int @@ -941,6 +1004,8 @@ static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = { { .op.code = RESUME, .op.arg = RESUME_OPARG_DEPTH1_MASK | RESUME_AT_FUNC_START } }; +const _Py_CODEUNIT *_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR = (_Py_CODEUNIT*)&_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS; + #ifdef Py_DEBUG extern void _PyUOpPrint(const _PyUOpInstruction *uop); #endif @@ -988,6 +1053,43 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) } } +#if _Py_TIER2 +// 0 for success, -1 for error. +static int +stop_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame) +{ + int _is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL); + int err = 0; + if (!_PyErr_Occurred(tstate) && !_is_sys_tracing) { + err = _PyOptimizer_Optimize(frame, tstate); + } + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + // Deal with backoffs + _PyExitData *exit = _tstate->jit_tracer_state.initial_state.exit; + if (exit == NULL) { + // We hold a strong reference to the code object, so the instruction won't be freed. + if (err <= 0) { + _Py_BackoffCounter counter = _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter; + _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter); + } + else { + _tstate->jit_tracer_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter(); + } + } + else { + // Likewise, we hold a strong reference to the executor containing this exit, so the exit is guaranteed + // to be valid to access. + if (err <= 0) { + exit->temperature = restart_backoff_counter(exit->temperature); + } + else { + exit->temperature = initial_temperature_backoff_counter(); + } + } + _PyJit_FinalizeTracing(tstate); + return err; +} +#endif /* _PyEval_EvalFrameDefault is too large to optimize for speed with PGO on MSVC. */ @@ -1117,9 +1219,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TAIL_CALL_INTERP # if Py_STATS - return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, 0, lastopcode); + return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_handler_table, 0, lastopcode); # else - return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_table, 0); + return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, instruction_funcptr_handler_table, 0); # endif #else goto error; @@ -1128,9 +1230,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int #if _Py_TAIL_CALL_INTERP # if Py_STATS - return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_table, 0, lastopcode); + return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_handler_table, 0, lastopcode); # else - return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_table, 0); + return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_handler_table, 0); # endif #else goto start_frame; @@ -1172,7 +1274,9 @@ _PyTier2Interpreter( tier2_start: next_uop = current_executor->trace; - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); + assert(next_uop->opcode == _START_EXECUTOR || + next_uop->opcode == _COLD_EXIT || + next_uop->opcode == _COLD_DYNAMIC_EXIT); #undef LOAD_IP #define LOAD_IP(UNUSED) (void)0 @@ -1196,7 +1300,9 @@ _PyTier2Interpreter( uint64_t trace_uop_execution_counter = 0; #endif - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); + assert(next_uop->opcode == _START_EXECUTOR || + next_uop->opcode == _COLD_EXIT || + next_uop->opcode == _COLD_DYNAMIC_EXIT); tier2_dispatch: for (;;) { uopcode = next_uop->opcode; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index afdcbc563b2c60..05a2760671e847 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -93,11 +93,19 @@ # define Py_PRESERVE_NONE_CC __attribute__((preserve_none)) Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS); +# define DISPATCH_TABLE_VAR instruction_funcptr_table +# define DISPATCH_TABLE instruction_funcptr_handler_table +# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table # define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS) + # define DISPATCH_GOTO() \ do { \ Py_MUSTTAIL return (((py_tail_call_funcptr *)instruction_funcptr_table)[opcode])(TAIL_CALL_ARGS); \ } while (0) +# define DISPATCH_GOTO_NON_TRACING() \ + do { \ + Py_MUSTTAIL return (((py_tail_call_funcptr *)DISPATCH_TABLE)[opcode])(TAIL_CALL_ARGS); \ + } while (0) # define JUMP_TO_LABEL(name) \ do { \ Py_MUSTTAIL return (_TAIL_CALL_##name)(TAIL_CALL_ARGS); \ @@ -115,19 +123,36 @@ # endif # define LABEL(name) TARGET(name) #elif USE_COMPUTED_GOTOS +# define DISPATCH_TABLE_VAR opcode_targets +# define DISPATCH_TABLE opcode_targets_table +# define TRACING_DISPATCH_TABLE opcode_tracing_targets_table # define TARGET(op) TARGET_##op: # define DISPATCH_GOTO() goto *opcode_targets[opcode] +# define DISPATCH_GOTO_NON_TRACING() goto *DISPATCH_TABLE[opcode]; # define JUMP_TO_LABEL(name) goto name; # define JUMP_TO_PREDICTED(name) goto PREDICTED_##name; # define LABEL(name) name: #else # define TARGET(op) case op: TARGET_##op: # define DISPATCH_GOTO() goto dispatch_opcode +# define DISPATCH_GOTO_NON_TRACING() goto dispatch_opcode # define JUMP_TO_LABEL(name) goto name; # define JUMP_TO_PREDICTED(name) goto PREDICTED_##name; # define LABEL(name) name: #endif +#if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2 +# define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE) +# define ENTER_TRACING() \ + DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE; +# define LEAVE_TRACING() \ + DISPATCH_TABLE_VAR = DISPATCH_TABLE; +#else +# define IS_JIT_TRACING() (0) +# define ENTER_TRACING() +# define LEAVE_TRACING() +#endif + /* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */ #ifdef Py_DEBUG #define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \ @@ -164,11 +189,19 @@ do { \ DISPATCH_GOTO(); \ } +#define DISPATCH_NON_TRACING() \ + { \ + assert(frame->stackpointer == NULL); \ + NEXTOPARG(); \ + PRE_DISPATCH_GOTO(); \ + DISPATCH_GOTO_NON_TRACING(); \ + } + #define DISPATCH_SAME_OPARG() \ { \ opcode = next_instr->op.code; \ PRE_DISPATCH_GOTO(); \ - DISPATCH_GOTO(); \ + DISPATCH_GOTO_NON_TRACING(); \ } #define DISPATCH_INLINED(NEW_FRAME) \ @@ -280,6 +313,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { /* This takes a uint16_t instead of a _Py_BackoffCounter, * because it is used directly on the cache entry in generated code, * which is always an integral type. */ +// Force re-specialization when tracing a side exit to get good side exits. #define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ backoff_counter_triggers(forge_backoff_counter((COUNTER))) @@ -366,12 +400,19 @@ do { \ next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \ frame = tstate->current_frame; \ stack_pointer = _PyFrame_GetStackPointer(frame); \ + int keep_tracing_bit = (uintptr_t)next_instr & 1; \ + next_instr = (_Py_CODEUNIT *)(((uintptr_t)next_instr) & (~1)); \ if (next_instr == NULL) { \ /* gh-140104: The exception handler expects frame->instr_ptr to after this_instr, not this_instr! */ \ next_instr = frame->instr_ptr + 1; \ JUMP_TO_LABEL(error); \ } \ + if (keep_tracing_bit) { \ + assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state.prev_state.code_curr_size == 2); \ + ENTER_TRACING(); \ + DISPATCH_NON_TRACING(); \ + } \ DISPATCH(); \ } while (0) @@ -382,13 +423,23 @@ do { \ goto tier2_start; \ } while (0) -#define GOTO_TIER_ONE(TARGET) \ - do \ - { \ - tstate->current_executor = NULL; \ - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - return TARGET; \ +#define GOTO_TIER_ONE_SETUP \ + tstate->current_executor = NULL; \ + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ + _PyFrame_SetStackPointer(frame, stack_pointer); + +#define GOTO_TIER_ONE(TARGET) \ + do \ + { \ + GOTO_TIER_ONE_SETUP \ + return (_Py_CODEUNIT *)(TARGET); \ + } while (0) + +#define GOTO_TIER_ONE_CONTINUE_TRACING(TARGET) \ + do \ + { \ + GOTO_TIER_ONE_SETUP \ + return (_Py_CODEUNIT *)(((uintptr_t)(TARGET))| 1); \ } while (0) #define CURRENT_OPARG() (next_uop[-1].oparg) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9ce0a9f8a4d87b..7ba2e9d0d92999 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4189,6 +4189,8 @@ break; } + /* _JUMP_BACKWARD_NO_INTERRUPT is not a viable micro-op for tier 2 because it is replaced */ + case _GET_LEN: { _PyStackRef obj; _PyStackRef len; @@ -7108,16 +7110,18 @@ PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); _PyExitData *exit = (_PyExitData *)exit_p; #if defined(Py_DEBUG) && !defined(_Py_JIT) - _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + const _Py_CODEUNIT *target = ((frame->owner == FRAME_OWNED_BY_INTERPRETER) + ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame)) + + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - if (frame->lltrace >= 2) { + if (frame->lltrace >= 3) { _PyFrame_SetStackPointer(frame, stack_pointer); printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", + printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); + _PyOpcode_OpName[target->op.code], exit->is_control_flow); stack_pointer = _PyFrame_GetStackPointer(frame); } #endif @@ -7126,6 +7130,28 @@ break; } + case _DYNAMIC_EXIT: { + PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); + #if defined(Py_DEBUG) && !defined(_Py_JIT) + _PyExitData *exit = (_PyExitData *)exit_p; + _Py_CODEUNIT *target = frame->instr_ptr; + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (frame->lltrace >= 3) { + _PyFrame_SetStackPointer(frame, stack_pointer); + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %tu, temp %d, target %d -> %s]\n", + exit - current_executor->exits, exit->temperature.value_and_backoff, + (int)(target - _PyFrame_GetBytecode(frame)), + _PyOpcode_OpName[target->op.code]); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + #endif + + GOTO_TIER_ONE(frame->instr_ptr); + break; + } + case _CHECK_VALIDITY: { if (!current_executor->vm_data.valid) { UOP_STAT_INC(uopcode, miss); @@ -7419,7 +7445,8 @@ } case _DEOPT: { - GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); + GOTO_TIER_ONE((frame->owner == FRAME_OWNED_BY_INTERPRETER) + ? _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR : _PyFrame_GetBytecode(frame) + CURRENT_TARGET()); break; } @@ -7460,37 +7487,101 @@ case _COLD_EXIT: { _PyExitData *exit = tstate->jit_exit; assert(exit != NULL); + assert(frame->owner < FRAME_OWNED_BY_INTERPRETER); _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); + assert(tstate->jit_exit == exit); + exit->executor = executor; + TIER2_TO_TIER2(exit->executor); } else { - _PyFrame_SetStackPointer(frame, stack_pointer); + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); - stack_pointer = _PyFrame_GetStackPointer(frame); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = previous_executor->vm_data.chain_depth + 1; - _PyFrame_SetStackPointer(frame, stack_pointer); - int optimized = _PyOptimizer_Optimize(frame, target, &executor, chain_depth); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - GOTO_TIER_ONE(optimized < 0 ? NULL : target); + int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow; + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, target->op.arg); + exit->temperature = restart_backoff_counter(exit->temperature); + if (succ) { + GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = initial_temperature_backoff_counter(); + GOTO_TIER_ONE(target); } - assert(tstate->jit_exit == exit); - exit->executor = executor; - TIER2_TO_TIER2(exit->executor); break; } + case _COLD_DYNAMIC_EXIT: { + _Py_CODEUNIT *target = frame->instr_ptr; + GOTO_TIER_ONE(target); + break; + } + + case _GUARD_IP__PUSH_FRAME: { + #define OFFSET_OF__PUSH_FRAME ((0)) + PyObject *ip = (PyObject *)CURRENT_OPERAND0(); + _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF__PUSH_FRAME; + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += OFFSET_OF__PUSH_FRAME; + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #undef OFFSET_OF__PUSH_FRAME + break; + } + + case _GUARD_IP_YIELD_VALUE: { + #define OFFSET_OF_YIELD_VALUE ((1+INLINE_CACHE_ENTRIES_SEND)) + PyObject *ip = (PyObject *)CURRENT_OPERAND0(); + _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_YIELD_VALUE; + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += OFFSET_OF_YIELD_VALUE; + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #undef OFFSET_OF_YIELD_VALUE + break; + } + + case _GUARD_IP_RETURN_VALUE: { + #define OFFSET_OF_RETURN_VALUE ((frame->return_offset)) + PyObject *ip = (PyObject *)CURRENT_OPERAND0(); + _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_RETURN_VALUE; + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += OFFSET_OF_RETURN_VALUE; + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #undef OFFSET_OF_RETURN_VALUE + break; + } + + case _GUARD_IP_RETURN_GENERATOR: { + #define OFFSET_OF_RETURN_GENERATOR ((frame->return_offset)) + PyObject *ip = (PyObject *)CURRENT_OPERAND0(); + _Py_CODEUNIT *target = frame->instr_ptr + OFFSET_OF_RETURN_GENERATOR; + if (target != (_Py_CODEUNIT *)ip) { + frame->instr_ptr += OFFSET_OF_RETURN_GENERATOR; + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #undef OFFSET_OF_RETURN_GENERATOR + break; + } + + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 79328a7b725613..a984da6dc912a2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5476,6 +5476,10 @@ INSTRUCTION_STATS(ENTER_EXECUTOR); opcode = ENTER_EXECUTOR; #ifdef _Py_TIER2 + if (IS_JIT_TRACING()) { + next_instr = this_instr; + JUMP_TO_LABEL(stop_tracing); + } PyCodeObject *code = _PyFrame_GetCode(frame); _PyExecutorObject *executor = code->co_executors->executors[oparg & 255]; assert(executor->vm_data.index == INSTR_OFFSET() - 1); @@ -7589,7 +7593,7 @@ /* Skip 1 cache entry */ // _SPECIALIZE_JUMP_BACKWARD { - #if ENABLE_SPECIALIZATION_FT + #if ENABLE_SPECIALIZATION if (this_instr->op.code == JUMP_BACKWARD) { uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); @@ -7645,30 +7649,20 @@ { #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { - _Py_CODEUNIT *start = this_instr; + if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && + this_instr->op.code == JUMP_BACKWARD_JIT && + next_instr->op.code != ENTER_EXECUTOR) { + _Py_CODEUNIT *insert_exec_at = this_instr; while (oparg > 255) { oparg >>= 8; - start--; + insert_exec_at--; } - _PyExecutorObject *executor; - _PyFrame_SetStackPointer(frame, stack_pointer); - int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); - stack_pointer = _PyFrame_GetStackPointer(frame); - if (optimized <= 0) { - this_instr[1].counter = restart_backoff_counter(counter); - if (optimized < 0) { - JUMP_TO_LABEL(error); - } + int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, oparg); + if (succ) { + ENTER_TRACING(); } else { - _PyFrame_SetStackPointer(frame, stack_pointer); - this_instr[1].counter = initial_jump_backoff_counter(); - stack_pointer = _PyFrame_GetStackPointer(frame); - assert(tstate->current_executor == NULL); - assert(executor != tstate->interp->cold_executor); - tstate->jit_exit = NULL; - TIER1_TO_TIER2(executor); + this_instr[1].counter = restart_backoff_counter(counter); } } else { @@ -12265,5 +12259,75 @@ JUMP_TO_LABEL(error); DISPATCH(); } + LABEL(record_previous_inst) + { + #if _Py_TIER2 + assert(IS_JIT_TRACING()); + int opcode = next_instr->op.code; + bool stop_tracing = (opcode == WITH_EXCEPT_START || + opcode == RERAISE || opcode == CLEANUP_THROW || + opcode == PUSH_EXC_INFO || opcode == INTERPRETER_EXIT); + _PyFrame_SetStackPointer(frame, stack_pointer); + int full = !_PyJit_translate_single_bytecode_to_trace(tstate, frame, next_instr, stop_tracing); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (full) { + LEAVE_TRACING(); + _PyFrame_SetStackPointer(frame, stack_pointer); + int err = stop_tracing_and_jit(tstate, frame); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err < 0) { + JUMP_TO_LABEL(error); + } + DISPATCH_GOTO_NON_TRACING(); + } + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND && + opcode == POP_TOP) || + (_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && + opcode == STORE_FAST)) { + _tstate->jit_tracer_state.prev_state.instr_is_super = true; + } + else { + _tstate->jit_tracer_state.prev_state.instr = next_instr; + } + PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable); + if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) { + _PyFrame_SetStackPointer(frame, stack_pointer); + Py_SETREF(_tstate->jit_tracer_state.prev_state.instr_code, (PyCodeObject*)Py_NewRef((prev_code))); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + _tstate->jit_tracer_state.prev_state.instr_frame = frame; + _tstate->jit_tracer_state.prev_state.instr_oparg = oparg; + _tstate->jit_tracer_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); + if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { + (&next_instr[1])->counter = trigger_backoff_counter(); + } + DISPATCH_GOTO_NON_TRACING(); + #else + Py_FatalError("JIT label executed in non-jit build."); + #endif + } + + LABEL(stop_tracing) + { + #if _Py_TIER2 + assert(IS_JIT_TRACING()); + int opcode = next_instr->op.code; + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyJit_translate_single_bytecode_to_trace(tstate, frame, NULL, true); + stack_pointer = _PyFrame_GetStackPointer(frame); + LEAVE_TRACING(); + _PyFrame_SetStackPointer(frame, stack_pointer); + int err = stop_tracing_and_jit(tstate, frame); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (err < 0) { + JUMP_TO_LABEL(error); + } + DISPATCH_GOTO_NON_TRACING(); + #else + Py_FatalError("JIT label executed in non-jit build."); + #endif + } + /* END LABELS */ #undef TIER_ONE diff --git a/Python/instrumentation.c b/Python/instrumentation.c index b4b2bc5dc69f9d..81e46a331e0b9e 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -18,6 +18,7 @@ #include "pycore_tuple.h" // _PyTuple_FromArraySteal() #include "opcode_ids.h" +#include "pycore_optimizer.h" /* Uncomment this to dump debugging output when assertions fail */ @@ -1785,6 +1786,7 @@ force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) _PyCode_Clear_Executors(code); } _Py_Executors_InvalidateDependency(interp, code, 1); + _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code); #endif int code_len = (int)Py_SIZE(code); /* Exit early to avoid creating instrumentation diff --git a/Python/jit.c b/Python/jit.c index 279e1ce6a0d2e5..7ab0f8ddd430dd 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -604,7 +604,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz unsigned char *code = memory; state.trampolines.mem = memory + code_size; unsigned char *data = memory + code_size + state.trampolines.size + code_padding; - assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); + assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT || trace[0].opcode == _COLD_DYNAMIC_EXIT); for (size_t i = 0; i < length; i++) { const _PyUOpInstruction *instruction = &trace[i]; group = &stencil_groups[instruction->opcode]; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 6dd443e1655ed0..1b9196503b570b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -257,8 +257,270 @@ static void *opcode_targets_table[256] = { &&TARGET_INSTRUMENTED_LINE, &&TARGET_ENTER_EXECUTOR, }; +#if _Py_TIER2 +static void *opcode_tracing_targets_table[256] = { + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, + &&record_previous_inst, +}; +#endif #else /* _Py_TAIL_CALL_INTERP */ -static py_tail_call_funcptr instruction_funcptr_table[256]; +static py_tail_call_funcptr instruction_funcptr_handler_table[256]; + +static py_tail_call_funcptr instruction_funcptr_tracing_table[256]; Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_2_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_pop_1_error(TAIL_CALL_PARAMS); @@ -266,6 +528,8 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_error(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_exception_unwind(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_exit_unwind(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_start_frame(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_record_previous_inst(TAIL_CALL_PARAMS); +Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_stop_tracing(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_BINARY_OP(TAIL_CALL_PARAMS); Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_BINARY_OP_ADD_FLOAT(TAIL_CALL_PARAMS); @@ -503,7 +767,7 @@ Py_PRESERVE_NONE_CC static PyObject *_TAIL_CALL_UNKNOWN_OPCODE(TAIL_CALL_PARAMS) JUMP_TO_LABEL(error); } -static py_tail_call_funcptr instruction_funcptr_table[256] = { +static py_tail_call_funcptr instruction_funcptr_handler_table[256] = { [BINARY_OP] = _TAIL_CALL_BINARY_OP, [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_BINARY_OP_ADD_FLOAT, [BINARY_OP_ADD_INT] = _TAIL_CALL_BINARY_OP_ADD_INT, @@ -761,4 +1025,262 @@ static py_tail_call_funcptr instruction_funcptr_table[256] = { [232] = _TAIL_CALL_UNKNOWN_OPCODE, [233] = _TAIL_CALL_UNKNOWN_OPCODE, }; +static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = { + [BINARY_OP] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_ADD_FLOAT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_ADD_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_ADD_UNICODE] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_EXTEND] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_INPLACE_ADD_UNICODE] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_MULTIPLY_FLOAT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_MULTIPLY_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_DICT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_GETITEM] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_LIST_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_LIST_SLICE] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_STR_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBSCR_TUPLE_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBTRACT_FLOAT] = _TAIL_CALL_record_previous_inst, + [BINARY_OP_SUBTRACT_INT] = _TAIL_CALL_record_previous_inst, + [BINARY_SLICE] = _TAIL_CALL_record_previous_inst, + [BUILD_INTERPOLATION] = _TAIL_CALL_record_previous_inst, + [BUILD_LIST] = _TAIL_CALL_record_previous_inst, + [BUILD_MAP] = _TAIL_CALL_record_previous_inst, + [BUILD_SET] = _TAIL_CALL_record_previous_inst, + [BUILD_SLICE] = _TAIL_CALL_record_previous_inst, + [BUILD_STRING] = _TAIL_CALL_record_previous_inst, + [BUILD_TEMPLATE] = _TAIL_CALL_record_previous_inst, + [BUILD_TUPLE] = _TAIL_CALL_record_previous_inst, + [CACHE] = _TAIL_CALL_record_previous_inst, + [CALL] = _TAIL_CALL_record_previous_inst, + [CALL_ALLOC_AND_ENTER_INIT] = _TAIL_CALL_record_previous_inst, + [CALL_BOUND_METHOD_EXACT_ARGS] = _TAIL_CALL_record_previous_inst, + [CALL_BOUND_METHOD_GENERAL] = _TAIL_CALL_record_previous_inst, + [CALL_BUILTIN_CLASS] = _TAIL_CALL_record_previous_inst, + [CALL_BUILTIN_FAST] = _TAIL_CALL_record_previous_inst, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = _TAIL_CALL_record_previous_inst, + [CALL_BUILTIN_O] = _TAIL_CALL_record_previous_inst, + [CALL_FUNCTION_EX] = _TAIL_CALL_record_previous_inst, + [CALL_INTRINSIC_1] = _TAIL_CALL_record_previous_inst, + [CALL_INTRINSIC_2] = _TAIL_CALL_record_previous_inst, + [CALL_ISINSTANCE] = _TAIL_CALL_record_previous_inst, + [CALL_KW] = _TAIL_CALL_record_previous_inst, + [CALL_KW_BOUND_METHOD] = _TAIL_CALL_record_previous_inst, + [CALL_KW_NON_PY] = _TAIL_CALL_record_previous_inst, + [CALL_KW_PY] = _TAIL_CALL_record_previous_inst, + [CALL_LEN] = _TAIL_CALL_record_previous_inst, + [CALL_LIST_APPEND] = _TAIL_CALL_record_previous_inst, + [CALL_METHOD_DESCRIPTOR_FAST] = _TAIL_CALL_record_previous_inst, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = _TAIL_CALL_record_previous_inst, + [CALL_METHOD_DESCRIPTOR_NOARGS] = _TAIL_CALL_record_previous_inst, + [CALL_METHOD_DESCRIPTOR_O] = _TAIL_CALL_record_previous_inst, + [CALL_NON_PY_GENERAL] = _TAIL_CALL_record_previous_inst, + [CALL_PY_EXACT_ARGS] = _TAIL_CALL_record_previous_inst, + [CALL_PY_GENERAL] = _TAIL_CALL_record_previous_inst, + [CALL_STR_1] = _TAIL_CALL_record_previous_inst, + [CALL_TUPLE_1] = _TAIL_CALL_record_previous_inst, + [CALL_TYPE_1] = _TAIL_CALL_record_previous_inst, + [CHECK_EG_MATCH] = _TAIL_CALL_record_previous_inst, + [CHECK_EXC_MATCH] = _TAIL_CALL_record_previous_inst, + [CLEANUP_THROW] = _TAIL_CALL_record_previous_inst, + [COMPARE_OP] = _TAIL_CALL_record_previous_inst, + [COMPARE_OP_FLOAT] = _TAIL_CALL_record_previous_inst, + [COMPARE_OP_INT] = _TAIL_CALL_record_previous_inst, + [COMPARE_OP_STR] = _TAIL_CALL_record_previous_inst, + [CONTAINS_OP] = _TAIL_CALL_record_previous_inst, + [CONTAINS_OP_DICT] = _TAIL_CALL_record_previous_inst, + [CONTAINS_OP_SET] = _TAIL_CALL_record_previous_inst, + [CONVERT_VALUE] = _TAIL_CALL_record_previous_inst, + [COPY] = _TAIL_CALL_record_previous_inst, + [COPY_FREE_VARS] = _TAIL_CALL_record_previous_inst, + [DELETE_ATTR] = _TAIL_CALL_record_previous_inst, + [DELETE_DEREF] = _TAIL_CALL_record_previous_inst, + [DELETE_FAST] = _TAIL_CALL_record_previous_inst, + [DELETE_GLOBAL] = _TAIL_CALL_record_previous_inst, + [DELETE_NAME] = _TAIL_CALL_record_previous_inst, + [DELETE_SUBSCR] = _TAIL_CALL_record_previous_inst, + [DICT_MERGE] = _TAIL_CALL_record_previous_inst, + [DICT_UPDATE] = _TAIL_CALL_record_previous_inst, + [END_ASYNC_FOR] = _TAIL_CALL_record_previous_inst, + [END_FOR] = _TAIL_CALL_record_previous_inst, + [END_SEND] = _TAIL_CALL_record_previous_inst, + [ENTER_EXECUTOR] = _TAIL_CALL_record_previous_inst, + [EXIT_INIT_CHECK] = _TAIL_CALL_record_previous_inst, + [EXTENDED_ARG] = _TAIL_CALL_record_previous_inst, + [FORMAT_SIMPLE] = _TAIL_CALL_record_previous_inst, + [FORMAT_WITH_SPEC] = _TAIL_CALL_record_previous_inst, + [FOR_ITER] = _TAIL_CALL_record_previous_inst, + [FOR_ITER_GEN] = _TAIL_CALL_record_previous_inst, + [FOR_ITER_LIST] = _TAIL_CALL_record_previous_inst, + [FOR_ITER_RANGE] = _TAIL_CALL_record_previous_inst, + [FOR_ITER_TUPLE] = _TAIL_CALL_record_previous_inst, + [GET_AITER] = _TAIL_CALL_record_previous_inst, + [GET_ANEXT] = _TAIL_CALL_record_previous_inst, + [GET_AWAITABLE] = _TAIL_CALL_record_previous_inst, + [GET_ITER] = _TAIL_CALL_record_previous_inst, + [GET_LEN] = _TAIL_CALL_record_previous_inst, + [GET_YIELD_FROM_ITER] = _TAIL_CALL_record_previous_inst, + [IMPORT_FROM] = _TAIL_CALL_record_previous_inst, + [IMPORT_NAME] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_CALL] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_CALL_FUNCTION_EX] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_CALL_KW] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_END_ASYNC_FOR] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_END_FOR] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_END_SEND] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_FOR_ITER] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_INSTRUCTION] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_JUMP_BACKWARD] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_JUMP_FORWARD] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_LINE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_LOAD_SUPER_ATTR] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_NOT_TAKEN] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_POP_ITER] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_POP_JUMP_IF_FALSE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_POP_JUMP_IF_NONE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_POP_JUMP_IF_TRUE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_RESUME] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_RETURN_VALUE] = _TAIL_CALL_record_previous_inst, + [INSTRUMENTED_YIELD_VALUE] = _TAIL_CALL_record_previous_inst, + [INTERPRETER_EXIT] = _TAIL_CALL_record_previous_inst, + [IS_OP] = _TAIL_CALL_record_previous_inst, + [JUMP_BACKWARD] = _TAIL_CALL_record_previous_inst, + [JUMP_BACKWARD_JIT] = _TAIL_CALL_record_previous_inst, + [JUMP_BACKWARD_NO_INTERRUPT] = _TAIL_CALL_record_previous_inst, + [JUMP_BACKWARD_NO_JIT] = _TAIL_CALL_record_previous_inst, + [JUMP_FORWARD] = _TAIL_CALL_record_previous_inst, + [LIST_APPEND] = _TAIL_CALL_record_previous_inst, + [LIST_EXTEND] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_CLASS] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_INSTANCE_VALUE] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_METHOD_LAZY_DICT] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_METHOD_NO_DICT] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_METHOD_WITH_VALUES] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_MODULE] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_PROPERTY] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_SLOT] = _TAIL_CALL_record_previous_inst, + [LOAD_ATTR_WITH_HINT] = _TAIL_CALL_record_previous_inst, + [LOAD_BUILD_CLASS] = _TAIL_CALL_record_previous_inst, + [LOAD_COMMON_CONSTANT] = _TAIL_CALL_record_previous_inst, + [LOAD_CONST] = _TAIL_CALL_record_previous_inst, + [LOAD_DEREF] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST_AND_CLEAR] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST_BORROW] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST_CHECK] = _TAIL_CALL_record_previous_inst, + [LOAD_FAST_LOAD_FAST] = _TAIL_CALL_record_previous_inst, + [LOAD_FROM_DICT_OR_DEREF] = _TAIL_CALL_record_previous_inst, + [LOAD_FROM_DICT_OR_GLOBALS] = _TAIL_CALL_record_previous_inst, + [LOAD_GLOBAL] = _TAIL_CALL_record_previous_inst, + [LOAD_GLOBAL_BUILTIN] = _TAIL_CALL_record_previous_inst, + [LOAD_GLOBAL_MODULE] = _TAIL_CALL_record_previous_inst, + [LOAD_LOCALS] = _TAIL_CALL_record_previous_inst, + [LOAD_NAME] = _TAIL_CALL_record_previous_inst, + [LOAD_SMALL_INT] = _TAIL_CALL_record_previous_inst, + [LOAD_SPECIAL] = _TAIL_CALL_record_previous_inst, + [LOAD_SUPER_ATTR] = _TAIL_CALL_record_previous_inst, + [LOAD_SUPER_ATTR_ATTR] = _TAIL_CALL_record_previous_inst, + [LOAD_SUPER_ATTR_METHOD] = _TAIL_CALL_record_previous_inst, + [MAKE_CELL] = _TAIL_CALL_record_previous_inst, + [MAKE_FUNCTION] = _TAIL_CALL_record_previous_inst, + [MAP_ADD] = _TAIL_CALL_record_previous_inst, + [MATCH_CLASS] = _TAIL_CALL_record_previous_inst, + [MATCH_KEYS] = _TAIL_CALL_record_previous_inst, + [MATCH_MAPPING] = _TAIL_CALL_record_previous_inst, + [MATCH_SEQUENCE] = _TAIL_CALL_record_previous_inst, + [NOP] = _TAIL_CALL_record_previous_inst, + [NOT_TAKEN] = _TAIL_CALL_record_previous_inst, + [POP_EXCEPT] = _TAIL_CALL_record_previous_inst, + [POP_ITER] = _TAIL_CALL_record_previous_inst, + [POP_JUMP_IF_FALSE] = _TAIL_CALL_record_previous_inst, + [POP_JUMP_IF_NONE] = _TAIL_CALL_record_previous_inst, + [POP_JUMP_IF_NOT_NONE] = _TAIL_CALL_record_previous_inst, + [POP_JUMP_IF_TRUE] = _TAIL_CALL_record_previous_inst, + [POP_TOP] = _TAIL_CALL_record_previous_inst, + [PUSH_EXC_INFO] = _TAIL_CALL_record_previous_inst, + [PUSH_NULL] = _TAIL_CALL_record_previous_inst, + [RAISE_VARARGS] = _TAIL_CALL_record_previous_inst, + [RERAISE] = _TAIL_CALL_record_previous_inst, + [RESERVED] = _TAIL_CALL_record_previous_inst, + [RESUME] = _TAIL_CALL_record_previous_inst, + [RESUME_CHECK] = _TAIL_CALL_record_previous_inst, + [RETURN_GENERATOR] = _TAIL_CALL_record_previous_inst, + [RETURN_VALUE] = _TAIL_CALL_record_previous_inst, + [SEND] = _TAIL_CALL_record_previous_inst, + [SEND_GEN] = _TAIL_CALL_record_previous_inst, + [SETUP_ANNOTATIONS] = _TAIL_CALL_record_previous_inst, + [SET_ADD] = _TAIL_CALL_record_previous_inst, + [SET_FUNCTION_ATTRIBUTE] = _TAIL_CALL_record_previous_inst, + [SET_UPDATE] = _TAIL_CALL_record_previous_inst, + [STORE_ATTR] = _TAIL_CALL_record_previous_inst, + [STORE_ATTR_INSTANCE_VALUE] = _TAIL_CALL_record_previous_inst, + [STORE_ATTR_SLOT] = _TAIL_CALL_record_previous_inst, + [STORE_ATTR_WITH_HINT] = _TAIL_CALL_record_previous_inst, + [STORE_DEREF] = _TAIL_CALL_record_previous_inst, + [STORE_FAST] = _TAIL_CALL_record_previous_inst, + [STORE_FAST_LOAD_FAST] = _TAIL_CALL_record_previous_inst, + [STORE_FAST_STORE_FAST] = _TAIL_CALL_record_previous_inst, + [STORE_GLOBAL] = _TAIL_CALL_record_previous_inst, + [STORE_NAME] = _TAIL_CALL_record_previous_inst, + [STORE_SLICE] = _TAIL_CALL_record_previous_inst, + [STORE_SUBSCR] = _TAIL_CALL_record_previous_inst, + [STORE_SUBSCR_DICT] = _TAIL_CALL_record_previous_inst, + [STORE_SUBSCR_LIST_INT] = _TAIL_CALL_record_previous_inst, + [SWAP] = _TAIL_CALL_record_previous_inst, + [TO_BOOL] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_ALWAYS_TRUE] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_BOOL] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_INT] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_LIST] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_NONE] = _TAIL_CALL_record_previous_inst, + [TO_BOOL_STR] = _TAIL_CALL_record_previous_inst, + [UNARY_INVERT] = _TAIL_CALL_record_previous_inst, + [UNARY_NEGATIVE] = _TAIL_CALL_record_previous_inst, + [UNARY_NOT] = _TAIL_CALL_record_previous_inst, + [UNPACK_EX] = _TAIL_CALL_record_previous_inst, + [UNPACK_SEQUENCE] = _TAIL_CALL_record_previous_inst, + [UNPACK_SEQUENCE_LIST] = _TAIL_CALL_record_previous_inst, + [UNPACK_SEQUENCE_TUPLE] = _TAIL_CALL_record_previous_inst, + [UNPACK_SEQUENCE_TWO_TUPLE] = _TAIL_CALL_record_previous_inst, + [WITH_EXCEPT_START] = _TAIL_CALL_record_previous_inst, + [YIELD_VALUE] = _TAIL_CALL_record_previous_inst, + [121] = _TAIL_CALL_UNKNOWN_OPCODE, + [122] = _TAIL_CALL_UNKNOWN_OPCODE, + [123] = _TAIL_CALL_UNKNOWN_OPCODE, + [124] = _TAIL_CALL_UNKNOWN_OPCODE, + [125] = _TAIL_CALL_UNKNOWN_OPCODE, + [126] = _TAIL_CALL_UNKNOWN_OPCODE, + [127] = _TAIL_CALL_UNKNOWN_OPCODE, + [210] = _TAIL_CALL_UNKNOWN_OPCODE, + [211] = _TAIL_CALL_UNKNOWN_OPCODE, + [212] = _TAIL_CALL_UNKNOWN_OPCODE, + [213] = _TAIL_CALL_UNKNOWN_OPCODE, + [214] = _TAIL_CALL_UNKNOWN_OPCODE, + [215] = _TAIL_CALL_UNKNOWN_OPCODE, + [216] = _TAIL_CALL_UNKNOWN_OPCODE, + [217] = _TAIL_CALL_UNKNOWN_OPCODE, + [218] = _TAIL_CALL_UNKNOWN_OPCODE, + [219] = _TAIL_CALL_UNKNOWN_OPCODE, + [220] = _TAIL_CALL_UNKNOWN_OPCODE, + [221] = _TAIL_CALL_UNKNOWN_OPCODE, + [222] = _TAIL_CALL_UNKNOWN_OPCODE, + [223] = _TAIL_CALL_UNKNOWN_OPCODE, + [224] = _TAIL_CALL_UNKNOWN_OPCODE, + [225] = _TAIL_CALL_UNKNOWN_OPCODE, + [226] = _TAIL_CALL_UNKNOWN_OPCODE, + [227] = _TAIL_CALL_UNKNOWN_OPCODE, + [228] = _TAIL_CALL_UNKNOWN_OPCODE, + [229] = _TAIL_CALL_UNKNOWN_OPCODE, + [230] = _TAIL_CALL_UNKNOWN_OPCODE, + [231] = _TAIL_CALL_UNKNOWN_OPCODE, + [232] = _TAIL_CALL_UNKNOWN_OPCODE, + [233] = _TAIL_CALL_UNKNOWN_OPCODE, +}; #endif /* _Py_TAIL_CALL_INTERP */ diff --git a/Python/optimizer.c b/Python/optimizer.c index 3b7e2dafab85bb..65007a256d0c3b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -29,11 +29,24 @@ #define MAX_EXECUTORS_SIZE 256 +// Trace too short, no progress: +// _START_EXECUTOR +// _MAKE_WARM +// _CHECK_VALIDITY +// _SET_IP +// is 4-5 instructions. +#define CODE_SIZE_NO_PROGRESS 5 +// We start with _START_EXECUTOR, _MAKE_WARM +#define CODE_SIZE_EMPTY 2 + #define _PyExecutorObject_CAST(op) ((_PyExecutorObject *)(op)) static bool has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr) { + if (code == (PyCodeObject *)&_Py_InitCleanup) { + return false; + } if (instr->op.code == ENTER_EXECUTOR) { return true; } @@ -100,11 +113,11 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO } static _PyExecutorObject * -make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies); +make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth); static int -uop_optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr, int curr_stackentries, +uop_optimize(_PyInterpreterFrame *frame, PyThreadState *tstate, + _PyExecutorObject **exec_ptr, bool progress_needed); /* Returns 1 if optimized, 0 if not optimized, and -1 for an error. @@ -113,10 +126,10 @@ uop_optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *instr, // gh-137573: inlining this function causes stack overflows Py_NO_INLINE int _PyOptimizer_Optimize( - _PyInterpreterFrame *frame, _Py_CODEUNIT *start, - _PyExecutorObject **executor_ptr, int chain_depth) + _PyInterpreterFrame *frame, PyThreadState *tstate) { - _PyStackRef *stack_pointer = frame->stackpointer; + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + int chain_depth = _tstate->jit_tracer_state.initial_state.chain_depth; PyInterpreterState *interp = _PyInterpreterState_GET(); if (!interp->jit) { // gh-140936: It is possible that interp->jit will become false during @@ -126,7 +139,9 @@ _PyOptimizer_Optimize( return 0; } assert(!interp->compiling); + assert(_tstate->jit_tracer_state.initial_state.stack_depth >= 0); #ifndef Py_GIL_DISABLED + assert(_tstate->jit_tracer_state.initial_state.func != NULL); interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long @@ -134,18 +149,24 @@ _PyOptimizer_Optimize( // this is true, since a deopt won't infinitely re-enter the executor: chain_depth %= MAX_CHAIN_DEPTH; bool progress_needed = chain_depth == 0; - PyCodeObject *code = _PyFrame_GetCode(frame); - assert(PyCode_Check(code)); + PyCodeObject *code = (PyCodeObject *)_tstate->jit_tracer_state.initial_state.code; + _Py_CODEUNIT *start = _tstate->jit_tracer_state.initial_state.start_instr; if (progress_needed && !has_space_for_executor(code, start)) { interp->compiling = false; return 0; } - int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); + // One of our dependencies while tracing was invalidated. Not worth compiling. + if (!_tstate->jit_tracer_state.prev_state.dependencies_still_valid) { + interp->compiling = false; + return 0; + } + _PyExecutorObject *executor; + int err = uop_optimize(frame, tstate, &executor, progress_needed); if (err <= 0) { interp->compiling = false; return err; } - assert(*executor_ptr != NULL); + assert(executor != NULL); if (progress_needed) { int index = get_index_for_executor(code, start); if (index < 0) { @@ -155,17 +176,21 @@ _PyOptimizer_Optimize( * If an optimizer has already produced an executor, * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ - Py_DECREF(*executor_ptr); + Py_DECREF(executor); interp->compiling = false; return 0; } - insert_executor(code, start, index, *executor_ptr); + insert_executor(code, start, index, executor); } else { - (*executor_ptr)->vm_data.code = NULL; + executor->vm_data.code = NULL; + } + _PyExitData *exit = _tstate->jit_tracer_state.initial_state.exit; + if (exit != NULL) { + exit->executor = executor; } - (*executor_ptr)->vm_data.chain_depth = chain_depth; - assert((*executor_ptr)->vm_data.valid); + executor->vm_data.chain_depth = chain_depth; + assert(executor->vm_data.valid); interp->compiling = false; return 1; #else @@ -474,6 +499,14 @@ BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP, }; +static const uint16_t +guard_ip_uop[MAX_UOP_ID + 1] = { + [_PUSH_FRAME] = _GUARD_IP__PUSH_FRAME, + [_RETURN_GENERATOR] = _GUARD_IP_RETURN_GENERATOR, + [_RETURN_VALUE] = _GUARD_IP_RETURN_VALUE, + [_YIELD_VALUE] = _GUARD_IP_YIELD_VALUE, +}; + #define CONFIDENCE_RANGE 1000 #define CONFIDENCE_CUTOFF 333 @@ -530,64 +563,19 @@ add_to_trace( DPRINTF(2, "No room for %s (need %d, got %d)\n", \ (opname), (n), max_length - trace_length); \ OPT_STAT_INC(trace_too_long); \ - goto done; \ - } - -// Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE -#define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode)) - -// Trace stack operations (used by _PUSH_FRAME, _RETURN_VALUE) -#define TRACE_STACK_PUSH() \ - if (trace_stack_depth >= TRACE_STACK_SIZE) { \ - DPRINTF(2, "Trace stack overflow\n"); \ - OPT_STAT_INC(trace_stack_overflow); \ - return 0; \ - } \ - assert(func == NULL || func->func_code == (PyObject *)code); \ - trace_stack[trace_stack_depth].func = func; \ - trace_stack[trace_stack_depth].code = code; \ - trace_stack[trace_stack_depth].instr = instr; \ - trace_stack_depth++; -#define TRACE_STACK_POP() \ - if (trace_stack_depth <= 0) { \ - Py_FatalError("Trace stack underflow\n"); \ - } \ - trace_stack_depth--; \ - func = trace_stack[trace_stack_depth].func; \ - code = trace_stack[trace_stack_depth].code; \ - assert(func == NULL || func->func_code == (PyObject *)code); \ - instr = trace_stack[trace_stack_depth].instr; - -/* Returns the length of the trace on success, - * 0 if it failed to produce a worthwhile trace, - * and -1 on an error. + goto full; \ + } + + +/* Returns 1 on success (added to trace), 0 on trace end. */ -static int -translate_bytecode_to_trace( +int +_PyJit_translate_single_bytecode_to_trace( + PyThreadState *tstate, _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, - _PyUOpInstruction *trace, - int buffer_size, - _PyBloomFilter *dependencies, bool progress_needed) + _Py_CODEUNIT *next_instr, + bool stop_tracing) { - bool first = true; - PyCodeObject *code = _PyFrame_GetCode(frame); - PyFunctionObject *func = _PyFrame_GetFunction(frame); - assert(PyFunction_Check(func)); - PyCodeObject *initial_code = code; - _Py_BloomFilter_Add(dependencies, initial_code); - _Py_CODEUNIT *initial_instr = instr; - int trace_length = 0; - // Leave space for possible trailing _EXIT_TRACE - int max_length = buffer_size-2; - struct { - PyFunctionObject *func; - PyCodeObject *code; - _Py_CODEUNIT *instr; - } trace_stack[TRACE_STACK_SIZE]; - int trace_stack_depth = 0; - int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions - bool jump_seen = false; #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -596,410 +584,468 @@ translate_bytecode_to_trace( lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that } #endif - - DPRINTF(2, - "Optimizing %s (%s:%d) at byte offset %d\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code)); - ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code)); - ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0); + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + PyCodeObject *old_code = _tstate->jit_tracer_state.prev_state.instr_code; + bool progress_needed = (_tstate->jit_tracer_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0; + _PyBloomFilter *dependencies = &_tstate->jit_tracer_state.prev_state.dependencies; + int trace_length = _tstate->jit_tracer_state.prev_state.code_curr_size; + _PyUOpInstruction *trace = _tstate->jit_tracer_state.code_buffer; + int max_length = _tstate->jit_tracer_state.prev_state.code_max_size; + + _Py_CODEUNIT *this_instr = _tstate->jit_tracer_state.prev_state.instr; + _Py_CODEUNIT *target_instr = this_instr; uint32_t target = 0; - for (;;) { - target = INSTR_IP(instr, code); - // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT - max_length-=2; - uint32_t opcode = instr->op.code; - uint32_t oparg = instr->op.arg; - - if (!first && instr == initial_instr) { - // We have looped around to the start: - RESERVE(1); - ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); - goto done; + target = Py_IsNone((PyObject *)old_code) + ? (int)(target_instr - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR) + : INSTR_IP(target_instr, old_code); + + // Rewind EXTENDED_ARG so that we see the whole thing. + // We must point to the first EXTENDED_ARG when deopting. + int oparg = _tstate->jit_tracer_state.prev_state.instr_oparg; + int opcode = this_instr->op.code; + int rewind_oparg = oparg; + while (rewind_oparg > 255) { + rewind_oparg >>= 8; + target--; + } + + int old_stack_level = _tstate->jit_tracer_state.prev_state.instr_stacklevel; + + // Strange control-flow + bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) && + (next_instr != this_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]); + + /* Special case the first instruction, + * so that we can guarantee forward progress */ + if (progress_needed && _tstate->jit_tracer_state.prev_state.code_curr_size < CODE_SIZE_NO_PROGRESS) { + if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { + opcode = _PyOpcode_Deopt[opcode]; } + assert(!OPCODE_HAS_EXIT(opcode)); + assert(!OPCODE_HAS_DEOPT(opcode)); + } - DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); + bool needs_guard_ip = OPCODE_HAS_NEEDS_GUARD_IP(opcode); + if (has_dynamic_jump_taken && !needs_guard_ip) { + DPRINTF(2, "Unsupported: dynamic jump taken %s\n", _PyOpcode_OpName[opcode]); + goto unsupported; + } - if (opcode == EXTENDED_ARG) { - instr++; - opcode = instr->op.code; - oparg = (oparg << 8) | instr->op.arg; - if (opcode == EXTENDED_ARG) { - instr--; - goto done; + int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL); + if (is_sys_tracing) { + goto full; + } + + if (stop_tracing) { + ADD_TO_TRACE(_DEOPT, 0, 0, target); + goto done; + } + + DPRINTF(2, "%p %d: %s(%d) %d %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip, old_stack_level); + +#ifdef Py_DEBUG + if (oparg > 255) { + assert(_Py_GetBaseCodeUnit(old_code, target).op.code == EXTENDED_ARG); + } +#endif + + // Skip over super instructions. + if (_tstate->jit_tracer_state.prev_state.instr_is_super) { + _tstate->jit_tracer_state.prev_state.instr_is_super = false; + return 1; + } + + if (opcode == ENTER_EXECUTOR) { + goto full; + } + + if (!_tstate->jit_tracer_state.prev_state.dependencies_still_valid) { + goto done; + } + + // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls + // If we haven't guarded the IP, then it's untraceable. + if (frame != _tstate->jit_tracer_state.prev_state.instr_frame && !needs_guard_ip) { + DPRINTF(2, "Unsupported: unguardable jump taken\n"); + goto unsupported; + } + + if (oparg > 0xFFFF) { + DPRINTF(2, "Unsupported: oparg too large\n"); + goto unsupported; + } + + // TODO (gh-140277): The constituent use one extra stack slot. So we need to check for headroom. + if (opcode == BINARY_OP_SUBSCR_GETITEM && old_stack_level + 1 > old_code->co_stacksize) { + unsupported: + { + // Rewind to previous instruction and replace with _EXIT_TRACE. + _PyUOpInstruction *curr = &trace[trace_length-1]; + while (curr->opcode != _SET_IP && trace_length > 2) { + trace_length--; + curr = &trace[trace_length-1]; + } + assert(curr->opcode == _SET_IP || trace_length == 2); + if (curr->opcode == _SET_IP) { + int32_t old_target = (int32_t)uop_get_target(curr); + curr++; + trace_length++; + curr->opcode = _EXIT_TRACE; + curr->format = UOP_FORMAT_TARGET; + curr->target = old_target; } - } - if (opcode == ENTER_EXECUTOR) { - // We have a couple of options here. We *could* peek "underneath" - // this executor and continue tracing, which could give us a longer, - // more optimizeable trace (at the expense of lots of duplicated - // tier two code). Instead, we choose to just end here and stitch to - // the other trace, which allows a side-exit traces to rejoin the - // "main" trace periodically (and also helps protect us against - // pathological behavior where the amount of tier two code explodes - // for a medium-length, branchy code path). This seems to work - // better in practice, but in the future we could be smarter about - // what we do here: goto done; } - assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); - RESERVE_RAW(2, "_CHECK_VALIDITY"); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); - if (!OPCODE_HAS_NO_SAVE_IP(opcode)) { - RESERVE_RAW(2, "_SET_IP"); - ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)instr, target); - } + } - /* Special case the first instruction, - * so that we can guarantee forward progress */ - if (first && progress_needed) { - assert(first); - if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { - opcode = _PyOpcode_Deopt[opcode]; - } - assert(!OPCODE_HAS_EXIT(opcode)); - assert(!OPCODE_HAS_DEOPT(opcode)); - } + if (opcode == NOP) { + return 1; + } - if (OPCODE_HAS_EXIT(opcode)) { - // Make space for side exit and final _EXIT_TRACE: - RESERVE_RAW(2, "_EXIT_TRACE"); - max_length--; - } - if (OPCODE_HAS_ERROR(opcode)) { - // Make space for error stub and final _EXIT_TRACE: - RESERVE_RAW(2, "_ERROR_POP_N"); - max_length--; - } - switch (opcode) { - case POP_JUMP_IF_NONE: - case POP_JUMP_IF_NOT_NONE: - case POP_JUMP_IF_FALSE: - case POP_JUMP_IF_TRUE: - { - RESERVE(1); - int counter = instr[1].cache; - int bitcount = _Py_popcount32(counter); - int jump_likely = bitcount > 8; - /* If bitcount is 8 (half the jumps were taken), adjust confidence by 50%. - For values in between, adjust proportionally. */ - if (jump_likely) { - confidence = confidence * bitcount / 16; - } - else { - confidence = confidence * (16 - bitcount) / 16; - } - uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; - DPRINTF(2, "%d: %s(%d): counter=%04x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n", - target, _PyOpcode_OpName[opcode], oparg, - counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode)); - if (confidence < CONFIDENCE_CUTOFF) { - DPRINTF(2, "Confidence too low (%d < %d)\n", confidence, CONFIDENCE_CUTOFF); - OPT_STAT_INC(low_confidence); - goto done; - } - _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - _Py_CODEUNIT *target_instr = next_instr + oparg; - if (jump_likely) { - DPRINTF(2, "Jump likely (%04x = %d bits), continue at byte offset %d\n", - instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code)); - instr = target_instr; - ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(next_instr, code)); - goto top; - } - ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, code)); - break; - } + if (opcode == JUMP_FORWARD) { + return 1; + } - case JUMP_BACKWARD: - case JUMP_BACKWARD_JIT: - ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); - _Py_FALLTHROUGH; - case JUMP_BACKWARD_NO_INTERRUPT: - { - instr += 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] - (int)oparg; - if (jump_seen) { - OPT_STAT_INC(inner_loop); - DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n"); - goto done; - } - jump_seen = true; - goto top; - } + if (opcode == EXTENDED_ARG) { + return 1; + } - case JUMP_FORWARD: - { - RESERVE(0); - // This will emit two _SET_IP instructions; leave it to the optimizer - instr += oparg; - break; - } + // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT + max_length -= 2; - case RESUME: - /* Use a special tier 2 version of RESUME_CHECK to allow traces to - * start with RESUME_CHECK */ - ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target); - break; + const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; - default: - { - const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; - if (expansion->nuops > 0) { - // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) - int nuops = expansion->nuops; - RESERVE(nuops + 1); /* One extra for exit */ - int16_t last_op = expansion->uops[nuops-1].uop; - if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { - // Check for trace stack underflow now: - // We can't bail e.g. in the middle of - // LOAD_CONST + _RETURN_VALUE. - if (trace_stack_depth == 0) { - DPRINTF(2, "Trace stack underflow\n"); - OPT_STAT_INC(trace_stack_underflow); - return 0; - } - } - uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM - for (int i = 0; i < nuops; i++) { - oparg = orig_oparg; - uint32_t uop = expansion->uops[i].uop; - uint64_t operand = 0; - // Add one to account for the actual opcode/oparg pair: - int offset = expansion->uops[i].offset + 1; - switch (expansion->uops[i].size) { - case OPARG_SIMPLE: - assert(opcode != JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD); - break; - case OPARG_CACHE_1: - operand = read_u16(&instr[offset].cache); - break; - case OPARG_CACHE_2: - operand = read_u32(&instr[offset].cache); - break; - case OPARG_CACHE_4: - operand = read_u64(&instr[offset].cache); - break; - case OPARG_TOP: // First half of super-instr - oparg = orig_oparg >> 4; - break; - case OPARG_BOTTOM: // Second half of super-instr - oparg = orig_oparg & 0xF; - break; - case OPARG_SAVE_RETURN_OFFSET: // op=_SAVE_RETURN_OFFSET; oparg=return_offset - oparg = offset; - assert(uop == _SAVE_RETURN_OFFSET); - break; - case OPARG_REPLACED: - uop = _PyUOp_Replacements[uop]; - assert(uop != 0); - uint32_t next_inst = target + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + (oparg > 255); - if (uop == _TIER2_RESUME_CHECK) { - target = next_inst; - } -#ifdef Py_DEBUG - else { - uint32_t jump_target = next_inst + oparg; - assert(_Py_GetBaseCodeUnit(code, jump_target).op.code == END_FOR); - assert(_Py_GetBaseCodeUnit(code, jump_target+1).op.code == POP_ITER); - } -#endif - break; - case OPERAND1_1: - assert(trace[trace_length-1].opcode == uop); - operand = read_u16(&instr[offset].cache); - trace[trace_length-1].operand1 = operand; - continue; - case OPERAND1_2: - assert(trace[trace_length-1].opcode == uop); - operand = read_u32(&instr[offset].cache); - trace[trace_length-1].operand1 = operand; - continue; - case OPERAND1_4: - assert(trace[trace_length-1].opcode == uop); - operand = read_u64(&instr[offset].cache); - trace[trace_length-1].operand1 = operand; - continue; - default: - fprintf(stderr, - "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n", - opcode, oparg, nuops, i, - expansion->uops[i].size, - expansion->uops[i].offset); - Py_FatalError("garbled expansion"); - } + assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); + assert(!_PyErr_Occurred(tstate)); - if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { - TRACE_STACK_POP(); - /* Set the operand to the function or code object returned to, - * to assist optimization passes. (See _PUSH_FRAME below.) - */ - if (func != NULL) { - operand = (uintptr_t)func; - } - else if (code != NULL) { - operand = (uintptr_t)code | 1; - } - else { - operand = 0; - } - ADD_TO_TRACE(uop, oparg, operand, target); - DPRINTF(2, - "Returning to %s (%s:%d) at byte offset %d\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(instr, code)); - goto top; - } - if (uop == _PUSH_FRAME) { - assert(i + 1 == nuops); - if (opcode == FOR_ITER_GEN || - opcode == LOAD_ATTR_PROPERTY || - opcode == BINARY_OP_SUBSCR_GETITEM || - opcode == SEND_GEN) - { - DPRINTF(2, "Bailing due to dynamic target\n"); - OPT_STAT_INC(unknown_callee); - return 0; - } - assert(_PyOpcode_Deopt[opcode] == CALL || _PyOpcode_Deopt[opcode] == CALL_KW); - int func_version_offset = - offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) - // Add one to account for the actual opcode/oparg pair: - + 1; - uint32_t func_version = read_u32(&instr[func_version_offset].cache); - PyCodeObject *new_code = NULL; - PyFunctionObject *new_func = - _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code); - DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n", - (int)func_version, new_func, new_code); - if (new_code != NULL) { - if (new_code == code) { - // Recursive call, bail (we could be here forever). - DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", - PyUnicode_AsUTF8(new_code->co_qualname), - PyUnicode_AsUTF8(new_code->co_filename), - new_code->co_firstlineno); - OPT_STAT_INC(recursive_call); - ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); - goto done; - } - if (new_code->co_version != func_version) { - // func.__code__ was updated. - // Perhaps it may happen again, so don't bother tracing. - // TODO: Reason about this -- is it better to bail or not? - DPRINTF(2, "Bailing because co_version != func_version\n"); - ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); - goto done; - } - // Increment IP to the return address - instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; - TRACE_STACK_PUSH(); - _Py_BloomFilter_Add(dependencies, new_code); - /* Set the operand to the callee's function or code object, - * to assist optimization passes. - * We prefer setting it to the function - * but if that's not available but the code is available, - * use the code, setting the low bit so the optimizer knows. - */ - if (new_func != NULL) { - operand = (uintptr_t)new_func; - } - else if (new_code != NULL) { - operand = (uintptr_t)new_code | 1; - } - else { - operand = 0; - } - ADD_TO_TRACE(uop, oparg, operand, target); - code = new_code; - func = new_func; - instr = _PyCode_CODE(code); - DPRINTF(2, - "Continuing in %s (%s:%d) at byte offset %d\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(instr, code)); - goto top; + if (OPCODE_HAS_EXIT(opcode)) { + // Make space for side exit and final _EXIT_TRACE: + max_length--; + } + if (OPCODE_HAS_ERROR(opcode)) { + // Make space for error stub and final _EXIT_TRACE: + max_length--; + } + + // _GUARD_IP leads to an exit. + max_length -= needs_guard_ip; + + RESERVE_RAW(expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)), "uop and various checks"); + + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); + + if (!OPCODE_HAS_NO_SAVE_IP(opcode)) { + ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target); + } + + // Can be NULL for the entry frame. + if (old_code != NULL) { + _Py_BloomFilter_Add(dependencies, old_code); + } + + switch (opcode) { + case POP_JUMP_IF_NONE: + case POP_JUMP_IF_NOT_NONE: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + { + _Py_CODEUNIT *computed_next_instr_without_modifiers = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + _Py_CODEUNIT *computed_next_instr = computed_next_instr_without_modifiers + (computed_next_instr_without_modifiers->op.code == NOT_TAKEN); + _Py_CODEUNIT *computed_jump_instr = computed_next_instr_without_modifiers + oparg; + assert(next_instr == computed_next_instr || next_instr == computed_jump_instr); + int jump_happened = computed_jump_instr == next_instr; + assert(jump_happened == (target_instr[1].cache & 1)); + uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_happened]; + ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ? computed_next_instr : computed_jump_instr, old_code)); + break; + } + case JUMP_BACKWARD_JIT: + // This is possible as the JIT might have re-activated after it was disabled + case JUMP_BACKWARD_NO_JIT: + case JUMP_BACKWARD: + ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target); + _Py_FALLTHROUGH; + case JUMP_BACKWARD_NO_INTERRUPT: + { + if ((next_instr != _tstate->jit_tracer_state.initial_state.close_loop_instr) && + (next_instr != _tstate->jit_tracer_state.initial_state.start_instr) && + _tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS && + // For side exits, we don't want to terminate them early. + _tstate->jit_tracer_state.initial_state.exit == NULL && + // These are coroutines, and we want to unroll those usually. + opcode != JUMP_BACKWARD_NO_INTERRUPT) { + // We encountered a JUMP_BACKWARD but not to the top of our own loop. + // We don't want to continue tracing as we might get stuck in the + // inner loop. Instead, end the trace where the executor of the + // inner loop might start and let the traces rejoin. + OPT_STAT_INC(inner_loop); + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + trace[trace_length-1].operand1 = true; // is_control_flow + DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, + _tstate->jit_tracer_state.initial_state.close_loop_instr, _tstate->jit_tracer_state.initial_state.start_instr); + goto done; + } + break; + } + + case RESUME: + case RESUME_CHECK: + /* Use a special tier 2 version of RESUME_CHECK to allow traces to + * start with RESUME_CHECK */ + ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target); + break; + default: + { + const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; + // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) + int nuops = expansion->nuops; + if (nuops == 0) { + DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]); + goto unsupported; + } + assert(nuops > 0); + uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM + uint32_t orig_target = target; + for (int i = 0; i < nuops; i++) { + oparg = orig_oparg; + target = orig_target; + uint32_t uop = expansion->uops[i].uop; + uint64_t operand = 0; + // Add one to account for the actual opcode/oparg pair: + int offset = expansion->uops[i].offset + 1; + switch (expansion->uops[i].size) { + case OPARG_SIMPLE: + assert(opcode != _JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD); + break; + case OPARG_CACHE_1: + operand = read_u16(&this_instr[offset].cache); + break; + case OPARG_CACHE_2: + operand = read_u32(&this_instr[offset].cache); + break; + case OPARG_CACHE_4: + operand = read_u64(&this_instr[offset].cache); + break; + case OPARG_TOP: // First half of super-instr + assert(orig_oparg <= 255); + oparg = orig_oparg >> 4; + break; + case OPARG_BOTTOM: // Second half of super-instr + assert(orig_oparg <= 255); + oparg = orig_oparg & 0xF; + break; + case OPARG_SAVE_RETURN_OFFSET: // op=_SAVE_RETURN_OFFSET; oparg=return_offset + oparg = offset; + assert(uop == _SAVE_RETURN_OFFSET); + break; + case OPARG_REPLACED: + uop = _PyUOp_Replacements[uop]; + assert(uop != 0); + + uint32_t next_inst = target + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + if (uop == _TIER2_RESUME_CHECK) { + target = next_inst; + } + else { + int extended_arg = orig_oparg > 255; + uint32_t jump_target = next_inst + orig_oparg + extended_arg; + assert(_Py_GetBaseCodeUnit(old_code, jump_target).op.code == END_FOR); + assert(_Py_GetBaseCodeUnit(old_code, jump_target+1).op.code == POP_ITER); + if (is_for_iter_test[uop]) { + target = jump_target + 1; } - DPRINTF(2, "Bail, new_code == NULL\n"); - OPT_STAT_INC(unknown_callee); - return 0; } - - if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) { - assert(i + 1 == nuops); - _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - assert(next_instr->op.code == STORE_FAST); - operand = next_instr->op.arg; - // Skip the STORE_FAST: - instr++; + break; + case OPERAND1_1: + assert(trace[trace_length-1].opcode == uop); + operand = read_u16(&this_instr[offset].cache); + trace[trace_length-1].operand1 = operand; + continue; + case OPERAND1_2: + assert(trace[trace_length-1].opcode == uop); + operand = read_u32(&this_instr[offset].cache); + trace[trace_length-1].operand1 = operand; + continue; + case OPERAND1_4: + assert(trace[trace_length-1].opcode == uop); + operand = read_u64(&this_instr[offset].cache); + trace[trace_length-1].operand1 = operand; + continue; + default: + fprintf(stderr, + "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n", + opcode, oparg, nuops, i, + expansion->uops[i].size, + expansion->uops[i].offset); + Py_FatalError("garbled expansion"); + } + if (uop == _PUSH_FRAME || uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { + PyCodeObject *new_code = (PyCodeObject *)PyStackRef_AsPyObjectBorrow(frame->f_executable); + PyFunctionObject *new_func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + + operand = 0; + if (frame->owner < FRAME_OWNED_BY_INTERPRETER) { + // Don't add nested code objects to the dependency. + // It causes endless re-traces. + if (new_func != NULL && !Py_IsNone((PyObject*)new_func) && !(new_code->co_flags & CO_NESTED)) { + operand = (uintptr_t)new_func; + DPRINTF(2, "Adding %p func to op\n", (void *)operand); + _Py_BloomFilter_Add(dependencies, new_func); + } + else if (new_code != NULL && !Py_IsNone((PyObject*)new_code)) { + operand = (uintptr_t)new_code | 1; + DPRINTF(2, "Adding %p code to op\n", (void *)operand); + _Py_BloomFilter_Add(dependencies, new_code); } - - // All other instructions - ADD_TO_TRACE(uop, oparg, operand, target); } + ADD_TO_TRACE(uop, oparg, operand, target); + trace[trace_length - 1].operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame))); break; } - DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]); - OPT_UNSUPPORTED_OPCODE(opcode); - goto done; // Break out of loop - } // End default - - } // End switch (opcode) + if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) { + assert(i + 1 == nuops); + _Py_CODEUNIT *next = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + assert(next->op.code == STORE_FAST); + operand = next->op.arg; + } + // All other instructions + ADD_TO_TRACE(uop, oparg, operand, target); + } + break; + } // End default - instr++; - // Add cache size for opcode - instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + } // End switch (opcode) - if (opcode == CALL_LIST_APPEND) { - assert(instr->op.code == POP_TOP); - instr++; + if (needs_guard_ip) { + uint16_t guard_ip = guard_ip_uop[trace[trace_length-1].opcode]; + if (guard_ip == 0) { + DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[trace[trace_length-1].opcode]); + Py_UNREACHABLE(); } - top: - // Jump here after _PUSH_FRAME or likely branches. - first = false; - } // End for (;;) - + ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0); + } + // Loop back to the start + int is_first_instr = _tstate->jit_tracer_state.initial_state.close_loop_instr == next_instr || + _tstate->jit_tracer_state.initial_state.start_instr == next_instr; + if (is_first_instr && _tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS) { + if (needs_guard_ip) { + ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0); + } + ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); + goto done; + } + DPRINTF(2, "Trace continuing\n"); + _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length; + _tstate->jit_tracer_state.prev_state.code_max_size = max_length; + return 1; done: - while (trace_stack_depth > 0) { - TRACE_STACK_POP(); - } - assert(code == initial_code); - // Skip short traces where we can't even translate a single instruction: - if (first) { - OPT_STAT_INC(trace_too_short); - DPRINTF(2, - "No trace for %s (%s:%d) at byte offset %d (no progress)\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code)); + DPRINTF(2, "Trace done\n"); + _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length; + _tstate->jit_tracer_state.prev_state.code_max_size = max_length; + return 0; +full: + DPRINTF(2, "Trace full\n"); + if (!is_terminator(&_tstate->jit_tracer_state.code_buffer[trace_length-1])) { + // Undo the last few instructions. + trace_length = _tstate->jit_tracer_state.prev_state.code_curr_size; + max_length = _tstate->jit_tracer_state.prev_state.code_max_size; + // We previously reversed one. + max_length += 1; + ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + trace[trace_length-1].operand1 = true; // is_control_flow + } + _tstate->jit_tracer_state.prev_state.code_curr_size = trace_length; + _tstate->jit_tracer_state.prev_state.code_max_size = max_length; + return 0; +} + +// Returns 0 for do not enter tracing, 1 on enter tracing. +int +_PyJit_TryInitializeTracing( + PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr, + _Py_CODEUNIT *start_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, + _PyExitData *exit, int oparg) +{ + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + // A recursive trace. + // Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces. + if (_tstate->jit_tracer_state.prev_state.code_curr_size > CODE_SIZE_EMPTY) { return 0; } - if (!is_terminator(&trace[trace_length-1])) { - /* Allow space for _EXIT_TRACE */ - max_length += 2; - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + if (oparg > 0xFFFF) { + return 0; + } + if (_tstate->jit_tracer_state.code_buffer == NULL) { + _tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); + if (_tstate->jit_tracer_state.code_buffer == NULL) { + // Don't error, just go to next instruction. + return 0; + } + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + if (func == NULL) { + return 0; + } + PyCodeObject *code = _PyFrame_GetCode(frame); +#ifdef Py_DEBUG + char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); + int lltrace = 0; + if (python_lltrace != NULL && *python_lltrace >= '0') { + lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that } - DPRINTF(1, - "Created a proto-trace for %s (%s:%d) at byte offset %d -- length %d\n", - PyUnicode_AsUTF8(code->co_qualname), - PyUnicode_AsUTF8(code->co_filename), - code->co_firstlineno, - 2 * INSTR_IP(initial_instr, code), - trace_length); - OPT_HIST(trace_length, trace_length_hist); - return trace_length; + DPRINTF(2, + "Tracing %s (%s:%d) at byte offset %d at chain depth %d\n", + PyUnicode_AsUTF8(code->co_qualname), + PyUnicode_AsUTF8(code->co_filename), + code->co_firstlineno, + 2 * INSTR_IP(close_loop_instr, code), + chain_depth); +#endif + + add_to_trace(_tstate->jit_tracer_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code)); + add_to_trace(_tstate->jit_tracer_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); + _tstate->jit_tracer_state.prev_state.code_curr_size = CODE_SIZE_EMPTY; + + _tstate->jit_tracer_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH; + _tstate->jit_tracer_state.initial_state.start_instr = start_instr; + _tstate->jit_tracer_state.initial_state.close_loop_instr = close_loop_instr; + _tstate->jit_tracer_state.initial_state.code = (PyCodeObject *)Py_NewRef(code); + _tstate->jit_tracer_state.initial_state.func = (PyFunctionObject *)Py_NewRef(func); + _tstate->jit_tracer_state.initial_state.exit = exit; + _tstate->jit_tracer_state.initial_state.stack_depth = curr_stackdepth; + _tstate->jit_tracer_state.initial_state.chain_depth = chain_depth; + _tstate->jit_tracer_state.prev_state.instr_frame = frame; + _tstate->jit_tracer_state.prev_state.dependencies_still_valid = true; + _tstate->jit_tracer_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame)); + _tstate->jit_tracer_state.prev_state.instr = curr_instr; + _tstate->jit_tracer_state.prev_state.instr_frame = frame; + _tstate->jit_tracer_state.prev_state.instr_oparg = oparg; + _tstate->jit_tracer_state.prev_state.instr_stacklevel = curr_stackdepth; + _tstate->jit_tracer_state.prev_state.instr_is_super = false; + assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL)); + _tstate->jit_tracer_state.initial_state.jump_backward_instr = curr_instr; + + if (_PyOpcode_Caches[_PyOpcode_Deopt[close_loop_instr->op.code]]) { + close_loop_instr[1].counter = trigger_backoff_counter(); + } + _Py_BloomFilter_Init(&_tstate->jit_tracer_state.prev_state.dependencies); + return 1; +} + +void +_PyJit_FinalizeTracing(PyThreadState *tstate) +{ + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + Py_CLEAR(_tstate->jit_tracer_state.initial_state.code); + Py_CLEAR(_tstate->jit_tracer_state.initial_state.func); + Py_CLEAR(_tstate->jit_tracer_state.prev_state.instr_code); + _tstate->jit_tracer_state.prev_state.code_curr_size = CODE_SIZE_EMPTY; + _tstate->jit_tracer_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1; } + #undef RESERVE #undef RESERVE_RAW #undef INSTR_IP @@ -1018,20 +1064,21 @@ count_exits(_PyUOpInstruction *buffer, int length) int exit_count = 0; for (int i = 0; i < length; i++) { int opcode = buffer[i].opcode; - if (opcode == _EXIT_TRACE) { + if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) { exit_count++; } } return exit_count; } -static void make_exit(_PyUOpInstruction *inst, int opcode, int target) +static void make_exit(_PyUOpInstruction *inst, int opcode, int target, bool is_control_flow) { inst->opcode = opcode; inst->oparg = 0; inst->operand0 = 0; inst->format = UOP_FORMAT_TARGET; inst->target = target; + inst->operand1 = is_control_flow; #ifdef Py_STATS inst->execution_count = 0; #endif @@ -1075,15 +1122,17 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) exit_op = _HANDLE_PENDING_AND_DEOPT; } int32_t jump_target = target; - if (is_for_iter_test[opcode]) { - /* Target the POP_TOP immediately after the END_FOR, - * leaving only the iterator on the stack. */ - int extended_arg = inst->oparg > 255; - int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg; - jump_target = next_inst + inst->oparg + 1; + if ( + opcode == _GUARD_IP__PUSH_FRAME || + opcode == _GUARD_IP_RETURN_VALUE || + opcode == _GUARD_IP_YIELD_VALUE || + opcode == _GUARD_IP_RETURN_GENERATOR + ) { + exit_op = _DYNAMIC_EXIT; } + bool is_control_flow = (opcode == _GUARD_IS_FALSE_POP || opcode == _GUARD_IS_TRUE_POP || is_for_iter_test[opcode]); if (jump_target != current_jump_target || current_exit_op != exit_op) { - make_exit(&buffer[next_spare], exit_op, jump_target); + make_exit(&buffer[next_spare], exit_op, jump_target, is_control_flow); current_exit_op = exit_op; current_jump_target = jump_target; current_jump = next_spare; @@ -1099,7 +1148,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) current_popped = popped; current_error = next_spare; current_error_target = target; - make_exit(&buffer[next_spare], _ERROR_POP_N, 0); + make_exit(&buffer[next_spare], _ERROR_POP_N, 0, false); buffer[next_spare].operand0 = target; next_spare++; } @@ -1157,7 +1206,9 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT); + CHECK(executor->trace[0].opcode == _START_EXECUTOR || + executor->trace[0].opcode == _COLD_EXIT || + executor->trace[0].opcode == _COLD_DYNAMIC_EXIT); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; @@ -1189,7 +1240,8 @@ sanity_check(_PyExecutorObject *executor) opcode == _DEOPT || opcode == _HANDLE_PENDING_AND_DEOPT || opcode == _EXIT_TRACE || - opcode == _ERROR_POP_N); + opcode == _ERROR_POP_N || + opcode == _DYNAMIC_EXIT); } } @@ -1202,7 +1254,7 @@ sanity_check(_PyExecutorObject *executor) * and not a NOP. */ static _PyExecutorObject * -make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies) +make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth) { int exit_count = count_exits(buffer, length); _PyExecutorObject *executor = allocate_executor(exit_count, length); @@ -1212,10 +1264,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil /* Initialize exits */ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); + cold->vm_data.chain_depth = chain_depth; for (int i = 0; i < exit_count; i++) { executor->exits[i].index = i; executor->exits[i].temperature = initial_temperature_backoff_counter(); - executor->exits[i].executor = cold; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1225,11 +1278,13 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil int opcode = buffer[i].opcode; dest--; *dest = buffer[i]; - assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE); - if (opcode == _EXIT_TRACE) { + if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) { _PyExitData *exit = &executor->exits[next_exit]; exit->target = buffer[i].target; dest->operand0 = (uint64_t)exit; + exit->executor = opcode == _EXIT_TRACE ? cold : cold_dynamic; + exit->is_dynamic = (char)(opcode == _DYNAMIC_EXIT); + exit->is_control_flow = (char)buffer[i].operand1; next_exit--; } } @@ -1291,38 +1346,32 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) static int uop_optimize( _PyInterpreterFrame *frame, - _Py_CODEUNIT *instr, + PyThreadState *tstate, _PyExecutorObject **exec_ptr, - int curr_stackentries, bool progress_needed) { - _PyBloomFilter dependencies; - _Py_BloomFilter_Init(&dependencies); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->jit_uop_buffer == NULL) { - interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); - if (interp->jit_uop_buffer == NULL) { - return 0; - } - } - _PyUOpInstruction *buffer = interp->jit_uop_buffer; + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + _PyBloomFilter *dependencies = &_tstate->jit_tracer_state.prev_state.dependencies; + _PyUOpInstruction *buffer = _tstate->jit_tracer_state.code_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; if (env_var == NULL || *env_var == '\0' || *env_var > '0') { is_noopt = false; } - int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies, progress_needed); - if (length <= 0) { - // Error or nothing translated - return length; + int curr_stackentries = _tstate->jit_tracer_state.initial_state.stack_depth; + int length = _tstate->jit_tracer_state.prev_state.code_curr_size; + if (length <= CODE_SIZE_NO_PROGRESS) { + return 0; } + assert(length > 0); assert(length < UOP_MAX_TRACE_LENGTH); OPT_STAT_INC(traces_created); if (!is_noopt) { - length = _Py_uop_analyze_and_optimize(frame, buffer, - length, - curr_stackentries, &dependencies); + length = _Py_uop_analyze_and_optimize( + _tstate->jit_tracer_state.initial_state.func, + buffer,length, + curr_stackentries, dependencies); if (length <= 0) { return length; } @@ -1345,14 +1394,14 @@ uop_optimize( OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist); length = prepare_for_execution(buffer, length); assert(length <= UOP_MAX_TRACE_LENGTH); - _PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies); + _PyExecutorObject *executor = make_executor_from_uops( + buffer, length, dependencies, _tstate->jit_tracer_state.initial_state.chain_depth); if (executor == NULL) { return -1; } assert(length <= UOP_MAX_TRACE_LENGTH); // Check executor coldness - PyThreadState *tstate = PyThreadState_Get(); // It's okay if this ends up going negative. if (--tstate->interp->executor_creation_counter == 0) { _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT); @@ -1539,6 +1588,35 @@ _PyExecutor_GetColdExecutor(void) return cold; } +_PyExecutorObject * +_PyExecutor_GetColdDynamicExecutor(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->cold_dynamic_executor != NULL) { + assert(interp->cold_dynamic_executor->trace[0].opcode == _COLD_DYNAMIC_EXIT); + return interp->cold_dynamic_executor; + } + _PyExecutorObject *cold = allocate_executor(0, 1); + if (cold == NULL) { + Py_FatalError("Cannot allocate core JIT code"); + } + ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_DYNAMIC_EXIT; +#ifdef _Py_JIT + cold->jit_code = NULL; + cold->jit_size = 0; + // This is initialized to true so we can prevent the executor + // from being immediately detected as cold and invalidated. + cold->vm_data.warm = true; + if (_PyJIT_Compile(cold, cold->trace, 1)) { + Py_DECREF(cold); + Py_FatalError("Cannot allocate core JIT code"); + } +#endif + _Py_SetImmortal((PyObject *)cold); + interp->cold_dynamic_executor = cold; + return cold; +} + void _PyExecutor_ClearExit(_PyExitData *exit) { @@ -1546,7 +1624,12 @@ _PyExecutor_ClearExit(_PyExitData *exit) return; } _PyExecutorObject *old = exit->executor; - exit->executor = _PyExecutor_GetColdExecutor(); + if (exit->is_dynamic) { + exit->executor = _PyExecutor_GetColdDynamicExecutor(); + } + else { + exit->executor = _PyExecutor_GetColdExecutor(); + } Py_DECREF(old); } @@ -1648,6 +1731,18 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_Executors_InvalidateAll(interp, is_invalidation); } +void +_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj) +{ + _PyBloomFilter obj_filter; + _Py_BloomFilter_Init(&obj_filter); + _Py_BloomFilter_Add(&obj_filter, obj); + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (bloom_filter_may_contain(&_tstate->jit_tracer_state.prev_state.dependencies, &obj_filter)) + { + _tstate->jit_tracer_state.prev_state.dependencies_still_valid = false; + } +} /* Invalidate all executors */ void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) @@ -1777,7 +1872,7 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) #ifdef Py_STATS fprintf(out, " %s -- %" PRIu64 "\n", i, opname, inst->execution_count); #else - fprintf(out, " %s\n", i, opname); + fprintf(out, " %s op0=%" PRIu64 "\n", i, opname, inst->operand0); #endif if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { break; @@ -1787,6 +1882,8 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) fprintf(out, "]\n\n"); /* Write all the outgoing edges */ + _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); for (uint32_t i = 0; i < executor->code_size; i++) { _PyUOpInstruction const *inst = &executor->trace[i]; uint16_t flags = _PyUop_Flags[inst->opcode]; @@ -1797,10 +1894,10 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) else if (flags & HAS_EXIT_FLAG) { assert(inst->format == UOP_FORMAT_JUMP); _PyUOpInstruction const *exit_inst = &executor->trace[inst->jump_target]; - assert(exit_inst->opcode == _EXIT_TRACE); + assert(exit_inst->opcode == _EXIT_TRACE || exit_inst->opcode == _DYNAMIC_EXIT); exit = (_PyExitData *)exit_inst->operand0; } - if (exit != NULL && exit->executor != NULL) { + if (exit != NULL && exit->executor != cold && exit->executor != cold_dynamic) { fprintf(out, "executor_%p:i%d -> executor_%p:start\n", executor, i, exit->executor); } if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a6add301ccb26c..8d7b734e17cb0b 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -142,8 +142,10 @@ incorrect_keys(PyObject *obj, uint32_t version) #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) #define STACK_SIZE() ((int)(ctx->frame->stack_len)) +#define CURRENT_FRAME_IS_INIT_SHIM() (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup)) + #define WITHIN_STACK_BOUNDS() \ - (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()) + (CURRENT_FRAME_IS_INIT_SHIM() || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE())) #define GETLOCAL(idx) ((ctx->frame->locals[idx])) @@ -267,7 +269,7 @@ static PyCodeObject * get_current_code_object(JitOptContext *ctx) { - return (PyCodeObject *)ctx->frame->func->func_code; + return (PyCodeObject *)ctx->frame->code; } static PyObject * @@ -298,10 +300,6 @@ optimize_uops( JitOptContext context; JitOptContext *ctx = &context; uint32_t opcode = UINT16_MAX; - int curr_space = 0; - int max_space = 0; - _PyUOpInstruction *first_valid_check_stack = NULL; - _PyUOpInstruction *corresponding_check_stack = NULL; // Make sure that watchers are set up PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -320,13 +318,18 @@ optimize_uops( ctx->frame = frame; _PyUOpInstruction *this_instr = NULL; + JitOptRef *stack_pointer = ctx->frame->stack_pointer; + for (int i = 0; !ctx->done; i++) { assert(i < trace_len); this_instr = &trace[i]; int oparg = this_instr->oparg; opcode = this_instr->opcode; - JitOptRef *stack_pointer = ctx->frame->stack_pointer; + + if (!CURRENT_FRAME_IS_INIT_SHIM()) { + stack_pointer = ctx->frame->stack_pointer; + } #ifdef Py_DEBUG if (get_lltrace() >= 3) { @@ -345,9 +348,11 @@ optimize_uops( Py_UNREACHABLE(); } assert(ctx->frame != NULL); - DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); - ctx->frame->stack_pointer = stack_pointer; - assert(STACK_LEVEL() >= 0); + if (!CURRENT_FRAME_IS_INIT_SHIM()) { + DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); + ctx->frame->stack_pointer = stack_pointer; + assert(STACK_LEVEL() >= 0); + } } if (ctx->out_of_space) { DPRINTF(3, "\n"); @@ -355,27 +360,21 @@ optimize_uops( } if (ctx->contradiction) { // Attempted to push a "bottom" (contradiction) symbol onto the stack. - // This means that the abstract interpreter has hit unreachable code. + // This means that the abstract interpreter has optimized to trace + // to an unreachable estate. // We *could* generate an _EXIT_TRACE or _FATAL_ERROR here, but hitting - // bottom indicates type instability, so we are probably better off + // bottom usually indicates an optimizer bug, so we are probably better off // retrying later. DPRINTF(3, "\n"); DPRINTF(1, "Hit bottom in abstract interpreter\n"); _Py_uop_abstractcontext_fini(ctx); + OPT_STAT_INC(optimizer_contradiction); return 0; } /* Either reached the end or cannot optimize further, but there * would be no benefit in retrying later */ _Py_uop_abstractcontext_fini(ctx); - if (first_valid_check_stack != NULL) { - assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); - assert(max_space > 0); - assert(max_space <= INT_MAX); - assert(max_space <= INT32_MAX); - first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; - first_valid_check_stack->operand0 = max_space; - } return trace_len; error: @@ -460,6 +459,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _NOP; } break; + case _EXIT_TRACE: default: { // Cancel out pushes and pops, repeatedly. So: @@ -493,7 +493,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ - bool needs_ip = opcode == _PUSH_FRAME; + bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE || opcode == _DYNAMIC_EXIT || opcode == _EXIT_TRACE); if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { needs_ip = true; may_have_escaped = true; @@ -503,10 +503,14 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[last_set_ip].opcode = _SET_IP; last_set_ip = -1; } + if (opcode == _EXIT_TRACE) { + return pc + 1; + } break; } case _JUMP_TO_TOP: - case _EXIT_TRACE: + case _DYNAMIC_EXIT: + case _DEOPT: return pc + 1; } } @@ -518,7 +522,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // > 0 - length of optimized trace int _Py_uop_analyze_and_optimize( - _PyInterpreterFrame *frame, + PyFunctionObject *func, _PyUOpInstruction *buffer, int length, int curr_stacklen, @@ -528,8 +532,8 @@ _Py_uop_analyze_and_optimize( OPT_STAT_INC(optimizer_attempts); length = optimize_uops( - _PyFrame_GetFunction(frame), buffer, - length, curr_stacklen, dependencies); + func, buffer, + length, curr_stacklen, dependencies); if (length == 0) { return length; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index da3d3c96bc1d97..06fa8a4522a499 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -342,7 +342,6 @@ dummy_func(void) { int already_bool = optimize_to_bool(this_instr, ctx, value, &value); if (!already_bool) { sym_set_type(value, &PyBool_Type); - value = sym_new_truthiness(ctx, value, true); } } @@ -752,8 +751,14 @@ dummy_func(void) { } op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame)) { - new_frame = PyJitRef_NULL; - ctx->done = true; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + PyCodeObject *co = get_code_with_logging((this_instr + 2)); + if (co == NULL) { + ctx->done = true; + break; + } + + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); } op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { @@ -764,8 +769,20 @@ dummy_func(void) { } op(_CREATE_INIT_FRAME, (init, self, args[oparg] -- init_frame)) { - init_frame = PyJitRef_NULL; - ctx->done = true; + ctx->frame->stack_pointer = stack_pointer - oparg - 2; + _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject *)&_Py_InitCleanup, 0, NULL, 0); + if (shim == NULL) { + break; + } + /* Push self onto stack of shim */ + shim->stack[0] = self; + shim->stack_pointer++; + assert((int)(shim->stack_pointer - shim->stack) == 1); + ctx->frame = shim; + ctx->curr_frame_depth++; + assert((this_instr + 1)->opcode == _PUSH_FRAME); + PyCodeObject *co = get_code_with_logging((this_instr + 1)); + init_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args-1, oparg+1)); } op(_RETURN_VALUE, (retval -- res)) { @@ -773,42 +790,65 @@ dummy_func(void) { JitOptRef temp = PyJitRef_StripReferenceInfo(retval); DEAD(retval); SAVE_STACK(); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (ctx->curr_frame_depth >= 2) { + PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth - 2].code; + if (expected_code == returning_code) { + assert((this_instr + 1)->opcode == _GUARD_IP_RETURN_VALUE); + REPLACE_OP((this_instr + 1), _NOP, 0, 0); + } + } + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - RELOAD_STACK(); res = temp; } op(_RETURN_GENERATOR, ( -- res)) { SYNC_SP(); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + _Py_BloomFilter_Add(dependencies, returning_code); + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; res = sym_new_unknown(ctx); - - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; } - op(_YIELD_VALUE, (unused -- value)) { - value = sym_new_unknown(ctx); + op(_YIELD_VALUE, (retval -- value)) { + // Mimics PyStackRef_MakeHeapSafe in the interpreter. + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); + DEAD(retval); + SAVE_STACK(); + ctx->frame->stack_pointer = stack_pointer; + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + _Py_BloomFilter_Add(dependencies, returning_code); + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } + stack_pointer = ctx->frame->stack_pointer; + RELOAD_STACK(); + value = temp; } op(_GET_ITER, (iterable -- iter, index_or_null)) { @@ -835,8 +875,6 @@ dummy_func(void) { } op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) { - assert(corresponding_check_stack == NULL); - corresponding_check_stack = this_instr; } op (_CHECK_STACK_SPACE_OPERAND, (framesize/2 -- )) { @@ -848,38 +886,29 @@ dummy_func(void) { op(_PUSH_FRAME, (new_frame -- )) { SYNC_SP(); - ctx->frame->stack_pointer = stack_pointer; + if (!CURRENT_FRAME_IS_INIT_SHIM()) { + ctx->frame->stack_pointer = stack_pointer; + } ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; stack_pointer = ctx->frame->stack_pointer; uint64_t operand = this_instr->operand0; - if (operand == 0 || (operand & 1)) { - // It's either a code object or NULL + if (operand == 0) { ctx->done = true; break; } - PyFunctionObject *func = (PyFunctionObject *)operand; - PyCodeObject *co = (PyCodeObject *)func->func_code; - assert(PyFunction_Check(func)); - ctx->frame->func = func; - /* Stack space handling */ - int framesize = co->co_framesize; - assert(framesize > 0); - curr_space += framesize; - if (curr_space < 0 || curr_space > INT32_MAX) { - // won't fit in signed 32-bit int - ctx->done = true; - break; - } - max_space = curr_space > max_space ? curr_space : max_space; - if (first_valid_check_stack == NULL) { - first_valid_check_stack = corresponding_check_stack; + if (!(operand & 1)) { + PyFunctionObject *func = (PyFunctionObject *)operand; + // No need to re-add to dependencies here. Already + // handled by the tracer. + ctx->frame->func = func; } - else if (corresponding_check_stack) { - // delete all but the first valid _CHECK_STACK_SPACE - corresponding_check_stack->opcode = _NOP; + // Fixed calls don't need IP guards. + if ((this_instr-1)->opcode == _SAVE_RETURN_OFFSET || + (this_instr-1)->opcode == _CREATE_INIT_FRAME) { + assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME); + REPLACE_OP(this_instr+1, _NOP, 0, 0); } - corresponding_check_stack = NULL; } op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) { @@ -1024,6 +1053,10 @@ dummy_func(void) { ctx->done = true; } + op(_DEOPT, (--)) { + ctx->done = true; + } + op(_REPLACE_WITH_TRUE, (value -- res)) { REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_True); res = sym_new_const(ctx, Py_True); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b08099d8e2fc3b..01263fe8c7a78f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -280,7 +280,6 @@ int already_bool = optimize_to_bool(this_instr, ctx, value, &value); if (!already_bool) { sym_set_type(value, &PyBool_Type); - value = sym_new_truthiness(ctx, value, true); } stack_pointer[-1] = value; break; @@ -1116,16 +1115,24 @@ JitOptRef temp = PyJitRef_StripReferenceInfo(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (ctx->curr_frame_depth >= 2) { + PyCodeObject *expected_code = ctx->frames[ctx->curr_frame_depth - 2].code; + if (expected_code == returning_code) { + assert((this_instr + 1)->opcode == _GUARD_IP_RETURN_VALUE); + REPLACE_OP((this_instr + 1), _NOP, 0, 0); + } + } + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; res = temp; stack_pointer[0] = res; stack_pointer += 1; @@ -1167,9 +1174,28 @@ } case _YIELD_VALUE: { + JitOptRef retval; JitOptRef value; - value = sym_new_unknown(ctx); - stack_pointer[-1] = value; + retval = stack_pointer[-1]; + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + ctx->frame->stack_pointer = stack_pointer; + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + _Py_BloomFilter_Add(dependencies, returning_code); + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } + stack_pointer = ctx->frame->stack_pointer; + value = temp; + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -2103,6 +2129,8 @@ break; } + /* _JUMP_BACKWARD_NO_INTERRUPT is not a viable micro-op for tier 2 */ + case _GET_LEN: { JitOptRef obj; JitOptRef len; @@ -2557,8 +2585,6 @@ } case _CHECK_STACK_SPACE: { - assert(corresponding_check_stack == NULL); - corresponding_check_stack = this_instr; break; } @@ -2601,34 +2627,26 @@ new_frame = stack_pointer[-1]; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - ctx->frame->stack_pointer = stack_pointer; + if (!CURRENT_FRAME_IS_INIT_SHIM()) { + ctx->frame->stack_pointer = stack_pointer; + } ctx->frame = (_Py_UOpsAbstractFrame *)PyJitRef_Unwrap(new_frame); ctx->curr_frame_depth++; stack_pointer = ctx->frame->stack_pointer; uint64_t operand = this_instr->operand0; - if (operand == 0 || (operand & 1)) { + if (operand == 0) { ctx->done = true; break; } - PyFunctionObject *func = (PyFunctionObject *)operand; - PyCodeObject *co = (PyCodeObject *)func->func_code; - assert(PyFunction_Check(func)); - ctx->frame->func = func; - int framesize = co->co_framesize; - assert(framesize > 0); - curr_space += framesize; - if (curr_space < 0 || curr_space > INT32_MAX) { - ctx->done = true; - break; - } - max_space = curr_space > max_space ? curr_space : max_space; - if (first_valid_check_stack == NULL) { - first_valid_check_stack = corresponding_check_stack; + if (!(operand & 1)) { + PyFunctionObject *func = (PyFunctionObject *)operand; + ctx->frame->func = func; } - else if (corresponding_check_stack) { - corresponding_check_stack->opcode = _NOP; + if ((this_instr-1)->opcode == _SAVE_RETURN_OFFSET || + (this_instr-1)->opcode == _CREATE_INIT_FRAME) { + assert((this_instr+1)->opcode == _GUARD_IP__PUSH_FRAME); + REPLACE_OP(this_instr+1, _NOP, 0, 0); } - corresponding_check_stack = NULL; break; } @@ -2761,9 +2779,24 @@ } case _CREATE_INIT_FRAME: { + JitOptRef *args; + JitOptRef self; JitOptRef init_frame; - init_frame = PyJitRef_NULL; - ctx->done = true; + args = &stack_pointer[-oparg]; + self = stack_pointer[-1 - oparg]; + ctx->frame->stack_pointer = stack_pointer - oparg - 2; + _Py_UOpsAbstractFrame *shim = frame_new(ctx, (PyCodeObject *)&_Py_InitCleanup, 0, NULL, 0); + if (shim == NULL) { + break; + } + shim->stack[0] = self; + shim->stack_pointer++; + assert((int)(shim->stack_pointer - shim->stack) == 1); + ctx->frame = shim; + ctx->curr_frame_depth++; + assert((this_instr + 1)->opcode == _PUSH_FRAME); + PyCodeObject *co = get_code_with_logging((this_instr + 1)); + init_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args-1, oparg+1)); stack_pointer[-2 - oparg] = init_frame; stack_pointer += -1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2948,8 +2981,13 @@ case _PY_FRAME_KW: { JitOptRef new_frame; - new_frame = PyJitRef_NULL; - ctx->done = true; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + PyCodeObject *co = get_code_with_logging((this_instr + 2)); + if (co == NULL) { + ctx->done = true; + break; + } + new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0)); stack_pointer[-3 - oparg] = new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -3005,17 +3043,19 @@ case _RETURN_GENERATOR: { JitOptRef res; - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + _Py_BloomFilter_Add(dependencies, returning_code); + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; res = sym_new_unknown(ctx); - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; stack_pointer[0] = res; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -3265,6 +3305,10 @@ break; } + case _DYNAMIC_EXIT: { + break; + } + case _CHECK_VALIDITY: { break; } @@ -3399,6 +3443,7 @@ } case _DEOPT: { + ctx->done = true; break; } @@ -3418,3 +3463,23 @@ break; } + case _COLD_DYNAMIC_EXIT: { + break; + } + + case _GUARD_IP__PUSH_FRAME: { + break; + } + + case _GUARD_IP_YIELD_VALUE: { + break; + } + + case _GUARD_IP_RETURN_VALUE: { + break; + } + + case _GUARD_IP_RETURN_GENERATOR: { + break; + } + diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 01cff0b014cc7b..8a71eff465e5a3 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -817,9 +817,14 @@ _Py_uop_frame_new( JitOptRef *args, int arg_len) { - assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH); + if (ctx->curr_frame_depth >= MAX_ABSTRACT_FRAME_DEPTH) { + ctx->done = true; + ctx->out_of_space = true; + OPT_STAT_INC(optimizer_frame_overflow); + return NULL; + } _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; - + frame->code = co; frame->stack_len = co->co_stacksize; frame->locals_len = co->co_nlocalsplus; @@ -901,13 +906,42 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx) } int -_Py_uop_frame_pop(JitOptContext *ctx) +_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries) { _Py_UOpsAbstractFrame *frame = ctx->frame; ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; - assert(ctx->curr_frame_depth >= 1); - ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + if (ctx->curr_frame_depth >= 1) { + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + // We returned to the correct code. Nothing to do here. + if (co == ctx->frame->code) { + return 0; + } + // Else: the code we recorded doesn't match the code we *think* we're + // returning to. We could trace anything, we can't just return to the + // old frame. We have to restore what the tracer recorded + // as the traced next frame. + // Remove the current frame, and later swap it out with the right one. + else { + ctx->curr_frame_depth--; + } + } + // Else: trace stack underflow. + + // This handles swapping out frames. + assert(curr_stackentries >= 1); + // -1 to stackentries as we push to the stack our return value after this. + _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, curr_stackentries - 1, NULL, 0); + if (new_frame == NULL) { + ctx->done = true; + return 1; + } + + ctx->curr_frame_depth++; + ctx->frame = new_frame; return 0; } diff --git a/Python/pystate.c b/Python/pystate.c index cf251c120d75af..c12a1418e74309 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -552,10 +552,6 @@ init_interpreter(PyInterpreterState *interp, _Py_brc_init_state(interp); #endif -#ifdef _Py_TIER2 - // Ensure the buffer is to be set as NULL. - interp->jit_uop_buffer = NULL; -#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; @@ -805,10 +801,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); - if (interp->jit_uop_buffer != NULL) { - _PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE); - interp->jit_uop_buffer = NULL; - } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); @@ -831,6 +823,14 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) assert(cold->vm_data.warm); _PyExecutor_Free(cold); } + + struct _PyExecutorObject *cold_dynamic = interp->cold_dynamic_executor; + if (cold_dynamic != NULL) { + interp->cold_dynamic_executor = NULL; + assert(cold_dynamic->vm_data.valid); + assert(cold_dynamic->vm_data.warm); + _PyExecutor_Free(cold_dynamic); + } /* We don't clear sysdict and builtins until the end of this function. Because clearing other attributes can execute arbitrary Python code which requires sysdict and builtins. */ @@ -1495,9 +1495,15 @@ init_threadstate(_PyThreadStateImpl *_tstate, _tstate->c_stack_top = 0; _tstate->c_stack_hard_limit = 0; + _tstate->c_stack_init_base = 0; + _tstate->c_stack_init_top = 0; + _tstate->asyncio_running_loop = NULL; _tstate->asyncio_running_task = NULL; +#ifdef _Py_TIER2 + _tstate->jit_tracer_state.code_buffer = NULL; +#endif tstate->delete_later = NULL; llist_init(&_tstate->mem_free_queue); @@ -1804,6 +1810,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) assert(tstate_impl->refcounts.values == NULL); #endif +#if _Py_TIER2 + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (_tstate->jit_tracer_state.code_buffer != NULL) { + _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, UOP_BUFFER_SIZE); + _tstate->jit_tracer_state.code_buffer = NULL; + } +#endif + HEAD_UNLOCK(runtime); // XXX Unbind in PyThreadState_Clear(), or earlier diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 4621ad250f4633..bd4a8cf0d3e65c 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -359,6 +359,7 @@ Parser/parser.c - soft_keywords - Parser/lexer/lexer.c - type_comment_prefix - Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - +Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR - Python/codecs.c - Py_hexdigits - Python/codecs.c - codecs_builtin_error_handlers - Python/codecs.c - ucnhash_capi - diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 9dd7e5dbfbae7b..d39013db4f7fd6 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -34,6 +34,8 @@ class Properties: side_exit: bool pure: bool uses_opcode: bool + needs_guard_ip: bool + unpredictable_jump: bool tier: int | None = None const_oparg: int = -1 needs_prev: bool = False @@ -75,6 +77,8 @@ def from_list(properties: list["Properties"]) -> "Properties": pure=all(p.pure for p in properties), needs_prev=any(p.needs_prev for p in properties), no_save_ip=all(p.no_save_ip for p in properties), + needs_guard_ip=any(p.needs_guard_ip for p in properties), + unpredictable_jump=any(p.unpredictable_jump for p in properties), ) @property @@ -102,6 +106,8 @@ def infallible(self) -> bool: side_exit=False, pure=True, no_save_ip=False, + needs_guard_ip=False, + unpredictable_jump=False, ) @@ -692,6 +698,11 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_Wrap", "PyStackRef_Unwrap", "_PyLong_CheckExactAndCompact", + "_PyExecutor_FromExit", + "_PyJit_TryInitializeTracing", + "_Py_unset_eval_breaker_bit", + "_Py_set_eval_breaker_bit", + "trigger_backoff_counter", ) @@ -882,6 +893,46 @@ def stmt_escapes(stmt: Stmt) -> bool: else: assert False, "Unexpected statement type" +def stmt_has_jump_on_unpredictable_path_body(stmts: list[Stmt] | None, branches_seen: int) -> tuple[bool, int]: + if not stmts: + return False, branches_seen + predict = False + seen = 0 + for st in stmts: + predict_body, seen_body = stmt_has_jump_on_unpredictable_path(st, branches_seen) + predict = predict or predict_body + seen += seen_body + return predict, seen + +def stmt_has_jump_on_unpredictable_path(stmt: Stmt, branches_seen: int) -> tuple[bool, int]: + if isinstance(stmt, BlockStmt): + return stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen) + elif isinstance(stmt, SimpleStmt): + for tkn in stmt.contents: + if tkn.text == "JUMPBY": + return True, branches_seen + return False, branches_seen + elif isinstance(stmt, IfStmt): + predict, seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen) + if stmt.else_body: + predict_else, seen_else = stmt_has_jump_on_unpredictable_path(stmt.else_body, branches_seen) + return predict != predict_else, seen + seen_else + 1 + return predict, seen + 1 + elif isinstance(stmt, MacroIfStmt): + predict, seen = stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen) + if stmt.else_body: + predict_else, seen_else = stmt_has_jump_on_unpredictable_path_body(stmt.else_body, branches_seen) + return predict != predict_else, seen + seen_else + return predict, seen + elif isinstance(stmt, ForStmt): + unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen) + return unpredictable, branches_seen + 1 + elif isinstance(stmt, WhileStmt): + unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen) + return unpredictable, branches_seen + 1 + else: + assert False, f"Unexpected statement type {stmt}" + def compute_properties(op: parser.CodeDef) -> Properties: escaping_calls = find_escaping_api_calls(op) @@ -909,6 +960,8 @@ def compute_properties(op: parser.CodeDef) -> Properties: escapes = stmt_escapes(op.block) pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations + unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(op.block, 0) + unpredictable_jump = False if isinstance(op, parser.LabelDef) else (unpredictable and branches_seen > 0) return Properties( escaping_calls=escaping_calls, escapes=escapes, @@ -932,6 +985,11 @@ def compute_properties(op: parser.CodeDef) -> Properties: no_save_ip=no_save_ip, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), + needs_guard_ip=(isinstance(op, parser.InstDef) + and (unpredictable_jump and "replaced" not in op.annotations)) + or variable_used(op, "LOAD_IP") + or variable_used(op, "DISPATCH_INLINED"), + unpredictable_jump=unpredictable_jump, ) def expand(items: list[StackItem], oparg: int) -> list[StackItem]: diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 61e855eb003706..0b5f764ec52b45 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -7,6 +7,7 @@ analysis_error, Label, CodeSection, + Uop, ) from cwriter import CWriter from typing import Callable, TextIO, Iterator, Iterable @@ -107,8 +108,9 @@ class Emitter: labels: dict[str, Label] _replacers: dict[str, ReplacementFunctionType] cannot_escape: bool + jump_prefix: str - def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False): + def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = False, jump_prefix: str = ""): self._replacers = { "EXIT_IF": self.exit_if, "AT_END_EXIT_IF": self.exit_if_after, @@ -131,6 +133,7 @@ def __init__(self, out: CWriter, labels: dict[str, Label], cannot_escape: bool = self.out = out self.labels = labels self.cannot_escape = cannot_escape + self.jump_prefix = jump_prefix def dispatch( self, @@ -167,7 +170,7 @@ def deopt_if( family_name = inst.family.name self.emit(f"UPDATE_MISS_STATS({family_name});\n") self.emit(f"assert(_PyOpcode_Deopt[opcode] == ({family_name}));\n") - self.emit(f"JUMP_TO_PREDICTED({family_name});\n") + self.emit(f"JUMP_TO_PREDICTED({self.jump_prefix}{family_name});\n") self.emit("}\n") return not always_true(first_tkn) @@ -198,10 +201,10 @@ def exit_if_after( def goto_error(self, offset: int, storage: Storage) -> str: if offset > 0: - return f"JUMP_TO_LABEL(pop_{offset}_error);" + return f"{self.jump_prefix}JUMP_TO_LABEL(pop_{offset}_error);" if offset < 0: storage.copy().flush(self.out) - return f"JUMP_TO_LABEL(error);" + return f"{self.jump_prefix}JUMP_TO_LABEL(error);" def error_if( self, @@ -421,7 +424,7 @@ def goto_label(self, goto: Token, label: Token, storage: Storage) -> None: elif storage.spilled: raise analysis_error("Cannot jump from spilled label without reloading the stack pointer", goto) self.out.start_line() - self.out.emit("JUMP_TO_LABEL(") + self.out.emit(f"{self.jump_prefix}JUMP_TO_LABEL(") self.out.emit(label) self.out.emit(")") @@ -731,6 +734,10 @@ def cflags(p: Properties) -> str: flags.append("HAS_PURE_FLAG") if p.no_save_ip: flags.append("HAS_NO_SAVE_IP_FLAG") + if p.unpredictable_jump: + flags.append("HAS_UNPREDICTABLE_JUMP_FLAG") + if p.needs_guard_ip: + flags.append("HAS_NEEDS_GUARD_IP_FLAG") if flags: return " | ".join(flags) else: diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index b649b38123388d..21ae785a0ec445 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -56,6 +56,8 @@ "ERROR_NO_POP", "NO_SAVE_IP", "PERIODIC", + "UNPREDICTABLE_JUMP", + "NEEDS_GUARD_IP", ] @@ -201,7 +203,7 @@ def generate_metadata_table(analysis: Analysis, out: CWriter) -> None: out.emit("struct opcode_metadata {\n") out.emit("uint8_t valid_entry;\n") out.emit("uint8_t instr_format;\n") - out.emit("uint16_t flags;\n") + out.emit("uint32_t flags;\n") out.emit("};\n\n") out.emit( f"extern const struct opcode_metadata _PyOpcode_opcode_metadata[{table_size}];\n" diff --git a/Tools/cases_generator/target_generator.py b/Tools/cases_generator/target_generator.py index 324ef2773abe28..36fa1d7fa4908b 100644 --- a/Tools/cases_generator/target_generator.py +++ b/Tools/cases_generator/target_generator.py @@ -31,6 +31,16 @@ def write_opcode_targets(analysis: Analysis, out: CWriter) -> None: for target in targets: out.emit(target) out.emit("};\n") + targets = ["&&_unknown_opcode,\n"] * 256 + for name, op in analysis.opmap.items(): + if op < 256: + targets[op] = f"&&record_previous_inst,\n" + out.emit("#if _Py_TIER2\n") + out.emit("static void *opcode_tracing_targets_table[256] = {\n") + for target in targets: + out.emit(target) + out.emit("};\n") + out.emit(f"#endif\n") out.emit("#else /* _Py_TAIL_CALL_INTERP */\n") def function_proto(name: str) -> str: @@ -38,7 +48,9 @@ def function_proto(name: str) -> str: def write_tailcall_dispatch_table(analysis: Analysis, out: CWriter) -> None: - out.emit("static py_tail_call_funcptr instruction_funcptr_table[256];\n") + out.emit("static py_tail_call_funcptr instruction_funcptr_handler_table[256];\n") + out.emit("\n") + out.emit("static py_tail_call_funcptr instruction_funcptr_tracing_table[256];\n") out.emit("\n") # Emit function prototypes for labels. @@ -60,7 +72,7 @@ def write_tailcall_dispatch_table(analysis: Analysis, out: CWriter) -> None: out.emit("\n") # Emit the dispatch table. - out.emit("static py_tail_call_funcptr instruction_funcptr_table[256] = {\n") + out.emit("static py_tail_call_funcptr instruction_funcptr_handler_table[256] = {\n") for name in sorted(analysis.instructions.keys()): out.emit(f"[{name}] = _TAIL_CALL_{name},\n") named_values = analysis.opmap.values() @@ -68,6 +80,16 @@ def write_tailcall_dispatch_table(analysis: Analysis, out: CWriter) -> None: if rest not in named_values: out.emit(f"[{rest}] = _TAIL_CALL_UNKNOWN_OPCODE,\n") out.emit("};\n") + + # Emit the tracing dispatch table. + out.emit("static py_tail_call_funcptr instruction_funcptr_tracing_table[256] = {\n") + for name in sorted(analysis.instructions.keys()): + out.emit(f"[{name}] = _TAIL_CALL_record_previous_inst,\n") + named_values = analysis.opmap.values() + for rest in range(256): + if rest not in named_values: + out.emit(f"[{rest}] = _TAIL_CALL_UNKNOWN_OPCODE,\n") + out.emit("};\n") outfile.write("#endif /* _Py_TAIL_CALL_INTERP */\n") arg_parser = argparse.ArgumentParser( diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 1bb5f48658ddfc..ac3e6b94afe49e 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -63,6 +63,7 @@ class Tier2Emitter(Emitter): def __init__(self, out: CWriter, labels: dict[str, Label]): super().__init__(out, labels) self._replacers["oparg"] = self.oparg + self._replacers["IP_OFFSET_OF"] = self.ip_offset_of def goto_error(self, offset: int, storage: Storage) -> str: # To do: Add jump targets for popping values. @@ -134,10 +135,30 @@ def oparg( self.out.emit_at(uop.name[-1], tkn) return True + def ip_offset_of( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert uop.name.startswith("_GUARD_IP") + # LPAREN + next(tkn_iter) + tok = next(tkn_iter) + self.emit(f" OFFSET_OF_{tok.text};\n") + # RPAREN + next(tkn_iter) + # SEMI + next(tkn_iter) + return True -def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> Stack: +def write_uop(uop: Uop, emitter: Emitter, stack: Stack, offset_strs: dict[str, tuple[str, str]]) -> Stack: locals: dict[str, Local] = {} try: + if name_offset_pair := offset_strs.get(uop.name): + emitter.emit(f"#define OFFSET_OF_{name_offset_pair[0]} ({name_offset_pair[1]})\n") emitter.out.start_line() if uop.properties.oparg: emitter.emit("oparg = CURRENT_OPARG();\n") @@ -158,6 +179,8 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> Stack: idx += 1 _, storage = emitter.emit_tokens(uop, storage, None, False) storage.flush(emitter.out) + if name_offset_pair: + emitter.emit(f"#undef OFFSET_OF_{name_offset_pair[0]}\n") except StackError as ex: raise analysis_error(ex.args[0], uop.body.open) from None return storage.stack @@ -165,6 +188,29 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> Stack: SKIPS = ("_EXTENDED_ARG",) +def populate_offset_strs(analysis: Analysis) -> dict[str, tuple[str, str]]: + offset_strs: dict[str, tuple[str, str]] = {} + for name, uop in analysis.uops.items(): + if not f"_GUARD_IP_{name}" in analysis.uops: + continue + tkn_iter = uop.body.tokens() + found = False + offset_str = "" + for token in tkn_iter: + if token.kind == "IDENTIFIER" and token.text == "LOAD_IP": + if found: + raise analysis_error("Cannot have two LOAD_IP in a guarded single uop.", uop.body.open) + offset = [] + while token.kind != "SEMI": + offset.append(token.text) + token = next(tkn_iter) + # 1: to remove the LOAD_IP text + offset_str = "".join(offset[1:]) + found = True + assert offset_str + offset_strs[f"_GUARD_IP_{name}"] = (name, offset_str) + return offset_strs + def generate_tier2( filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool ) -> None: @@ -179,7 +225,9 @@ def generate_tier2( ) out = CWriter(outfile, 2, lines) emitter = Tier2Emitter(out, analysis.labels) + offset_strs = populate_offset_strs(analysis) out.emit("\n") + for name, uop in analysis.uops.items(): if uop.properties.tier == 1: continue @@ -194,13 +242,15 @@ def generate_tier2( out.emit(f"case {uop.name}: {{\n") declare_variables(uop, out) stack = Stack() - stack = write_uop(uop, emitter, stack) + stack = write_uop(uop, emitter, stack, offset_strs) out.start_line() if not uop.properties.always_exits: out.emit("break;\n") out.start_line() out.emit("}") out.emit("\n\n") + + out.emit("\n") outfile.write("#undef TIER_TWO\n") diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 1cc23837a72dea..0e0396e5143348 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -23,13 +23,13 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: - out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n") + out.emit("extern const uint32_t _PyUop_Flags[MAX_UOP_ID+1];\n") out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n") out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n") out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n") out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") - out.emit("const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {\n") + out.emit("const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = {\n") for uop in analysis.uops.values(): if uop.is_viable() and uop.properties.tier != 1: out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n") diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 2f146014a1c26b..857e926d119900 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -55,13 +55,10 @@ do { \ __attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \ } while (0) -#undef GOTO_TIER_ONE -#define GOTO_TIER_ONE(TARGET) \ -do { \ - tstate->current_executor = NULL; \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - return TARGET; \ -} while (0) +#undef GOTO_TIER_ONE_SETUP +#define GOTO_TIER_ONE_SETUP \ + tstate->current_executor = NULL; \ + _PyFrame_SetStackPointer(frame, stack_pointer); #undef LOAD_IP #define LOAD_IP(UNUSED) \