diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index fa9568ab4d0e85..4c55770e01da4f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState +#include "pycore_uop.h" // struct _PyUOpInstruction #define CODE_MAX_WATCHERS 8 @@ -949,6 +950,8 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; + bool compiling; + struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 94d01999f68d9d..c1a6b7abbf5de4 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_typedefs.h" // _PyInterpreterFrame +#include "pycore_uop.h" // _PyUOpInstruction #include "pycore_uop_ids.h" #include "pycore_stackref.h" // _PyStackRef #include @@ -41,32 +42,6 @@ typedef struct { PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). } _PyVMData; -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:15; - uint16_t format:1; - uint16_t oparg; - union { - uint32_t target; - struct { - uint16_t jump_target; - uint16_t error_target; - }; - }; - uint64_t operand0; // A cache entry - uint64_t operand1; -#ifdef Py_STATS - uint64_t execution_count; -#endif -} _PyUOpInstruction; - typedef struct _PyExitData { uint32_t target; uint16_t index; @@ -118,9 +93,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // trace_run_counter is greater than this value. #define JIT_CLEANUP_THRESHOLD 100000 -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 - #define TRACE_STACK_SIZE 5 int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h new file mode 100644 index 00000000000000..4abefd3b95d21a --- /dev/null +++ b/Include/internal/pycore_uop.h @@ -0,0 +1,45 @@ +#ifndef Py_CORE_UOP_H +#define Py_CORE_UOP_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct _PyUOpInstruction{ + uint16_t opcode:15; + uint16_t format:1; + uint16_t oparg; + union { + uint32_t target; + struct { + uint16_t jump_target; + uint16_t error_target; + }; + }; + uint64_t operand0; // A cache entry + uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif +} _PyUOpInstruction; + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 1200 +#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UOP_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 34bd4540efb0b8..f1fe50c88ebde6 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1435,6 +1435,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_uniqueid.h \ + $(srcdir)/Include/internal/pycore_uop.h \ $(srcdir)/Include/internal/pycore_uop_ids.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_warnings.h \ diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index e9eedfd1312fae..1868b222f18534 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -882,6 +882,15 @@ Include\internal + + Include\internal + + + Include\internal + + + Include\internal + Include\internal\mimalloc diff --git a/Python/optimizer.c b/Python/optimizer.c index b82c790ffa9e69..6f27b521018c8c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -116,7 +116,10 @@ _PyOptimizer_Optimize( _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - assert(_PyInterpreterState_GET()->jit); + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->jit); + assert(!interp->compiling); + interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -126,10 +129,12 @@ _PyOptimizer_Optimize( PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); if (progress_needed && !has_space_for_executor(code, start)) { + interp->compiling = false; return 0; } int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { + interp->compiling = false; return err; } assert(*executor_ptr != NULL); @@ -143,6 +148,7 @@ _PyOptimizer_Optimize( * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ Py_DECREF(*executor_ptr); + interp->compiling = false; return 0; } insert_executor(code, start, index, *executor_ptr); @@ -152,6 +158,7 @@ _PyOptimizer_Optimize( } (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); + interp->compiling = false; return 1; } @@ -1280,7 +1287,14 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->jit_uop_buffer == NULL) { + interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); + if (interp->jit_uop_buffer == NULL) { + return 0; + } + } + _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 51a777077d8255..20d985f59e72eb 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1702,6 +1702,7 @@ finalize_modules(PyThreadState *tstate) // Invalidate all executors and turn off JIT: interp->jit = false; + interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); #endif diff --git a/Python/pystate.c b/Python/pystate.c index 2465d8667472dc..5305fd47791ec4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -23,6 +23,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_time.h" // _PyTime_Init() +#include "pycore_uop.h" // UOP_BUFFER_SIZE #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() @@ -556,6 +557,11 @@ init_interpreter(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif + +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL. + interp->jit_uop_buffer = NULL; +#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; @@ -571,6 +577,7 @@ init_interpreter(PyInterpreterState *interp, } interp->_code_object_generation = 0; interp->jit = false; + interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0; @@ -803,6 +810,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); + if (interp->jit_uop_buffer != NULL) { + _PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE); + interp->jit_uop_buffer = NULL; + } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp);