From fe3c1af26e82dc9015950ed325089fe564187756 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:14:31 +0100 Subject: [PATCH 01/30] gh-137838: Move _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH] to _PyThreadStateImpl --- Include/internal/pycore_optimizer.h | 30 +---------------- Include/internal/pycore_tstate.h | 32 ++++++++++++++++++- ...-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst | 2 ++ Python/optimizer.c | 3 +- 4 files changed, 36 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 94d01999f68d9d..4c05c4c1df0ca9 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -11,6 +11,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" #include "pycore_stackref.h" // _PyStackRef +#include "pycore_tstate.h" // _PyUOpInstruction #include @@ -41,32 +42,6 @@ typedef struct { PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). } _PyVMData; -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:15; - uint16_t format:1; - uint16_t oparg; - union { - uint32_t target; - struct { - uint16_t jump_target; - uint16_t error_target; - }; - }; - uint64_t operand0; // A cache entry - uint64_t operand1; -#ifdef Py_STATS - uint64_t execution_count; -#endif -} _PyUOpInstruction; - typedef struct _PyExitData { uint32_t target; uint16_t index; @@ -118,9 +93,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // trace_run_counter is greater than this value. #define JIT_CLEANUP_THRESHOLD 100000 -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 - #define TRACE_STACK_SIZE 5 int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index bad968428c73a1..5291b70b8662c6 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -13,7 +13,6 @@ extern "C" { #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr - #ifdef Py_GIL_DISABLED struct _gc_thread_state { /* Thread-local allocation count. */ @@ -21,6 +20,36 @@ struct _gc_thread_state { }; #endif +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct _PyUOpInstruction{ + uint16_t opcode:15; + uint16_t format:1; + uint16_t oparg; + union { + uint32_t target; + struct { + uint16_t jump_target; + uint16_t error_target; + }; + }; + uint64_t operand0; // A cache entry + uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif +} _PyUOpInstruction; + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 1200 + + // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. @@ -75,6 +104,7 @@ typedef struct _PyThreadStateImpl { #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) Py_ssize_t reftotal; // this thread's total refcount operations #endif + struct _PyUOpInstruction buffer[1200]; } _PyThreadStateImpl; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst new file mode 100644 index 00000000000000..fdede2efd1ae01 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst @@ -0,0 +1,2 @@ +Move _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH] to _PyThreadStateImpl. +Patch By Donghee Na. diff --git a/Python/optimizer.c b/Python/optimizer.c index b82c790ffa9e69..6a895a907cbd80 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1280,7 +1280,8 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + _PyUOpInstruction *buffer = tstate->buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; From cd868b9eec9d4e8bd8cbbd413129bbd7dee998ea Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:16:29 +0100 Subject: [PATCH 02/30] nit --- Include/internal/pycore_tstate.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 5291b70b8662c6..c14cc212ce4aa6 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -104,7 +104,10 @@ typedef struct _PyThreadStateImpl { #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) Py_ssize_t reftotal; // this thread's total refcount operations #endif - struct _PyUOpInstruction buffer[1200]; + +#ifdef _Py_TIER2 + struct _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; +#endif } _PyThreadStateImpl; From daf47305a0a5938ad6c217acb6f13e21db6590dc Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:18:41 +0100 Subject: [PATCH 03/30] nit --- Include/internal/pycore_tstate.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index c14cc212ce4aa6..9ee7c941890ba0 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -13,6 +13,7 @@ extern "C" { #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr + #ifdef Py_GIL_DISABLED struct _gc_thread_state { /* Thread-local allocation count. */ From e09af56ae2b059666af95a1765c4592989b0311b Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:43:10 +0100 Subject: [PATCH 04/30] Update --- .../2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst deleted file mode 100644 index fdede2efd1ae01..00000000000000 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-15-13-14-20.gh-issue-137838.Ju-vRW.rst +++ /dev/null @@ -1,2 +0,0 @@ -Move _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH] to _PyThreadStateImpl. -Patch By Donghee Na. From be0b25b0a7415660b441d74bdd5b70897880662f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:59:04 +0100 Subject: [PATCH 05/30] Address code review --- Include/internal/pycore_tstate.h | 30 +----------------------------- Makefile.pre.in | 1 + PCbuild/pythoncore.vcxproj.filters | 9 +++++++++ 3 files changed, 11 insertions(+), 29 deletions(-) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 9ee7c941890ba0..fd3a2928b8cab3 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,6 +12,7 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr +#include "pycore_uop.h" // struct _PyUOpInstruction #ifdef Py_GIL_DISABLED @@ -21,35 +22,6 @@ struct _gc_thread_state { }; #endif -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct _PyUOpInstruction{ - uint16_t opcode:15; - uint16_t format:1; - uint16_t oparg; - union { - uint32_t target; - struct { - uint16_t jump_target; - uint16_t error_target; - }; - }; - uint64_t operand0; // A cache entry - uint64_t operand1; -#ifdef Py_STATS - uint64_t execution_count; -#endif -} _PyUOpInstruction; - -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 - // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields diff --git a/Makefile.pre.in b/Makefile.pre.in index 34bd4540efb0b8..f1fe50c88ebde6 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1435,6 +1435,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_uniqueid.h \ + $(srcdir)/Include/internal/pycore_uop.h \ $(srcdir)/Include/internal/pycore_uop_ids.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_warnings.h \ diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index e9eedfd1312fae..1868b222f18534 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -882,6 +882,15 @@ Include\internal + + Include\internal + + + Include\internal + + + Include\internal + Include\internal\mimalloc From 61b32eea977987c0ba051010160f2c7895fa8971 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 13:59:47 +0100 Subject: [PATCH 06/30] Add Include/internal/pycore_uop.h --- Include/internal/pycore_uop.h | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Include/internal/pycore_uop.h diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h new file mode 100644 index 00000000000000..79c2bd355e3700 --- /dev/null +++ b/Include/internal/pycore_uop.h @@ -0,0 +1,44 @@ +#ifndef Py_CORE_UOP_H +#define Py_CORE_UOP_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct _PyUOpInstruction{ + uint16_t opcode:15; + uint16_t format:1; + uint16_t oparg; + union { + uint32_t target; + struct { + uint16_t jump_target; + uint16_t error_target; + }; + }; + uint64_t operand0; // A cache entry + uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif +} _PyUOpInstruction; + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 1200 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UOP_H */ From a39b4c1a5abe802c0234673bf51c79676ea5ed14 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 14:04:02 +0100 Subject: [PATCH 07/30] Update --- Include/internal/pycore_uop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h index 79c2bd355e3700..b569c46ccb926f 100644 --- a/Include/internal/pycore_uop.h +++ b/Include/internal/pycore_uop.h @@ -8,7 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include +#include /* Depending on the format, * the 32 bits between the oparg and operand are: * UOP_FORMAT_TARGET: From f32f7a70e7c3d063c4633f3c407dbd3053c3ef46 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 14:10:08 +0100 Subject: [PATCH 08/30] Update Include/internal/pycore_tstate.h --- Include/internal/pycore_tstate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index fd3a2928b8cab3..b4177261d694ee 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -22,7 +22,6 @@ struct _gc_thread_state { }; #endif - // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. From fd15eb866ffa1ea5dd9236ebc264f14fcdf62812 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 15:37:07 +0100 Subject: [PATCH 09/30] nit --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 4c05c4c1df0ca9..188abab8c9625a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -11,7 +11,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" #include "pycore_stackref.h" // _PyStackRef -#include "pycore_tstate.h" // _PyUOpInstruction +#include "pycore_uop.h" // _PyUOpInstruction #include From 471832b1d3c6d9b8467d636f5b3adb505c9c303b Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 15 Sep 2025 15:37:36 +0100 Subject: [PATCH 10/30] nit --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 188abab8c9625a..c1a6b7abbf5de4 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -9,9 +9,9 @@ extern "C" { #endif #include "pycore_typedefs.h" // _PyInterpreterFrame +#include "pycore_uop.h" // _PyUOpInstruction #include "pycore_uop_ids.h" #include "pycore_stackref.h" // _PyStackRef -#include "pycore_uop.h" // _PyUOpInstruction #include From d12b70596db96107082bd411242834f077aff57a Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 11:58:02 +0100 Subject: [PATCH 11/30] Use lazy allocation --- Include/internal/pycore_tstate.h | 2 +- Python/optimizer.c | 7 +++++++ Python/pylifecycle.c | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index b4177261d694ee..25b6b16918041f 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -78,7 +78,7 @@ typedef struct _PyThreadStateImpl { #endif #ifdef _Py_TIER2 - struct _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + struct _PyUOpInstruction *buffer; #endif } _PyThreadStateImpl; diff --git a/Python/optimizer.c b/Python/optimizer.c index 6a895a907cbd80..16874373931371 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1281,6 +1281,13 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + if (tstate->buffer == NULL) { + tstate->buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); + if (tstate->buffer == NULL) { + PyErr_NoMemory(); + return -1; + } + } _PyUOpInstruction *buffer = tstate->buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 51a777077d8255..eab92e8b80901d 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1704,6 +1704,11 @@ finalize_modules(PyThreadState *tstate) interp->jit = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (_tstate->buffer != NULL) { + PyMem_RawFree(_tstate->buffer); + _tstate->buffer = NULL; + } #endif // Stop watching __builtin__ modifications From c628ffc5efe2def222e097614dddd27fbe1fa5ad Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 13:51:34 +0100 Subject: [PATCH 12/30] Address code review --- Python/pylifecycle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index eab92e8b80901d..a13bd532b91148 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -863,6 +863,10 @@ pycore_interp_init(PyThreadState *tstate) if (_tstate->c_stack_hard_limit == 0) { _Py_InitializeRecursionLimits(tstate); } +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL for MSVC + _tstate->buffer = NULL; +#endif PyInterpreterState *interp = tstate->interp; PyStatus status; PyObject *sysmod = NULL; From b17541646c404e52ac87e61c47f60ae3d1c65fa8 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 14:04:49 +0100 Subject: [PATCH 13/30] Address code review --- Include/internal/pycore_tstate.h | 2 +- Python/optimizer.c | 9 ++++----- Python/pylifecycle.c | 9 --------- Python/pystate.c | 13 +++++++++++++ 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 25b6b16918041f..2ad02b5be6999f 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -78,7 +78,7 @@ typedef struct _PyThreadStateImpl { #endif #ifdef _Py_TIER2 - struct _PyUOpInstruction *buffer; + struct _PyUOpInstruction *jit_uop_buffer; #endif } _PyThreadStateImpl; diff --git a/Python/optimizer.c b/Python/optimizer.c index 16874373931371..f902f696545708 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1281,14 +1281,13 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - if (tstate->buffer == NULL) { - tstate->buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); - if (tstate->buffer == NULL) { - PyErr_NoMemory(); + if (tstate->jit_uop_buffer == NULL) { + tstate->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); + if (tstate->jit_uop_buffer == NULL) { return -1; } } - _PyUOpInstruction *buffer = tstate->buffer; + _PyUOpInstruction *buffer = tstate->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index a13bd532b91148..51a777077d8255 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -863,10 +863,6 @@ pycore_interp_init(PyThreadState *tstate) if (_tstate->c_stack_hard_limit == 0) { _Py_InitializeRecursionLimits(tstate); } -#ifdef _Py_TIER2 - // Ensure the buffer is to be set as NULL for MSVC - _tstate->buffer = NULL; -#endif PyInterpreterState *interp = tstate->interp; PyStatus status; PyObject *sysmod = NULL; @@ -1708,11 +1704,6 @@ finalize_modules(PyThreadState *tstate) interp->jit = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (_tstate->buffer != NULL) { - PyMem_RawFree(_tstate->buffer); - _tstate->buffer = NULL; - } #endif // Stop watching __builtin__ modifications diff --git a/Python/pystate.c b/Python/pystate.c index 2465d8667472dc..4308ff844b05b8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1536,6 +1536,11 @@ new_threadstate(PyInterpreterState *interp, int whence) } #endif +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL for MSVC + tstate->jit_uop_buffer = NULL; +#endif + /* We serialize concurrent creation to protect global state. */ HEAD_LOCK(interp->runtime); @@ -1726,6 +1731,14 @@ PyThreadState_Clear(PyThreadState *tstate) _Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate); #endif +#ifdef _Py_TIER2 + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (_tstate->jit_uop_buffer != NULL) { + PyMem_RawFree(_tstate->jit_uop_buffer); + _tstate->jit_uop_buffer = NULL; + } +#endif + // Merge our queue of pointers to be freed into the interpreter queue. _PyMem_AbandonDelayed(tstate); From ad15aaf4c2cc3468705109b66202b85cd30f7984 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 14:08:41 +0100 Subject: [PATCH 14/30] Address code review --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 4308ff844b05b8..8680156a21e59b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1537,7 +1537,7 @@ new_threadstate(PyInterpreterState *interp, int whence) #endif #ifdef _Py_TIER2 - // Ensure the buffer is to be set as NULL for MSVC + // Ensure the buffer is to be set as NULL. tstate->jit_uop_buffer = NULL; #endif From 07e6ba166565289538efc6fbef7f1adc46c48efa Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 14:09:54 +0100 Subject: [PATCH 15/30] Address code review --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index f902f696545708..aec68bbca5398e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1284,7 +1284,7 @@ uop_optimize( if (tstate->jit_uop_buffer == NULL) { tstate->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); if (tstate->jit_uop_buffer == NULL) { - return -1; + return 0; } } _PyUOpInstruction *buffer = tstate->jit_uop_buffer; From f13260e806b1771f6cf56e09519dd70a105498bc Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 22:40:11 +0100 Subject: [PATCH 16/30] Address code review --- Include/internal/pycore_interp_structs.h | 5 +++++ Include/internal/pycore_tstate.h | 5 ----- Python/optimizer.c | 10 +++++----- Python/pystate.c | 22 +++++++++------------- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index fa9568ab4d0e85..e6b11c3224a2b8 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState +#include "pycore_uop.h" // struct _PyUOpInstruction #define CODE_MAX_WATCHERS 8 @@ -898,6 +899,10 @@ struct _is { struct _stoptheworld_state stoptheworld; struct _qsbr_shared qsbr; +#ifdef _Py_TIER2 + struct _PyUOpInstruction *jit_uop_buffer; +#endif + #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 2ad02b5be6999f..bad968428c73a1 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,7 +12,6 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr -#include "pycore_uop.h" // struct _PyUOpInstruction #ifdef Py_GIL_DISABLED @@ -77,10 +76,6 @@ typedef struct _PyThreadStateImpl { Py_ssize_t reftotal; // this thread's total refcount operations #endif -#ifdef _Py_TIER2 - struct _PyUOpInstruction *jit_uop_buffer; -#endif - } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Python/optimizer.c b/Python/optimizer.c index aec68bbca5398e..259cdb0e698af4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1280,14 +1280,14 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - if (tstate->jit_uop_buffer == NULL) { - tstate->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); - if (tstate->jit_uop_buffer == NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->jit_uop_buffer == NULL) { + interp->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); + if (interp->jit_uop_buffer == NULL) { return 0; } } - _PyUOpInstruction *buffer = tstate->jit_uop_buffer; + _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; diff --git a/Python/pystate.c b/Python/pystate.c index 8680156a21e59b..f45087824c1b3d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -556,6 +556,11 @@ init_interpreter(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif + +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL. + interp->jit_uop_buffer = NULL; +#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; @@ -803,6 +808,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); + if (interp->jit_uop_buffer != NULL) { + PyMem_RawFree(interp->jit_uop_buffer); + interp->jit_uop_buffer = NULL; + } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); @@ -1536,11 +1545,6 @@ new_threadstate(PyInterpreterState *interp, int whence) } #endif -#ifdef _Py_TIER2 - // Ensure the buffer is to be set as NULL. - tstate->jit_uop_buffer = NULL; -#endif - /* We serialize concurrent creation to protect global state. */ HEAD_LOCK(interp->runtime); @@ -1731,14 +1735,6 @@ PyThreadState_Clear(PyThreadState *tstate) _Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate); #endif -#ifdef _Py_TIER2 - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (_tstate->jit_uop_buffer != NULL) { - PyMem_RawFree(_tstate->jit_uop_buffer); - _tstate->jit_uop_buffer = NULL; - } -#endif - // Merge our queue of pointers to be freed into the interpreter queue. _PyMem_AbandonDelayed(tstate); From ab39a55e57981dd1fe6ad2454257add987d1dcaa Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 23:08:36 +0100 Subject: [PATCH 17/30] test --- Python/pystate.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index f45087824c1b3d..23307cbc23e04d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -556,11 +556,6 @@ init_interpreter(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif - -#ifdef _Py_TIER2 - // Ensure the buffer is to be set as NULL. - interp->jit_uop_buffer = NULL; -#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; From 60541dc982ee99830ed4aa16fe857143e419bd35 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 23:27:55 +0100 Subject: [PATCH 18/30] Revert "test" This reverts commit ab39a55e57981dd1fe6ad2454257add987d1dcaa. --- Python/pystate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/pystate.c b/Python/pystate.c index 23307cbc23e04d..f45087824c1b3d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -556,6 +556,11 @@ init_interpreter(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif + +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL. + interp->jit_uop_buffer = NULL; +#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; From 877f3a90efad8641a253f3b81ef597f8486efe14 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 23:28:20 +0100 Subject: [PATCH 19/30] Just for test --- Python/pystate.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index f45087824c1b3d..85c0a421bb1595 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -808,10 +808,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); - if (interp->jit_uop_buffer != NULL) { - PyMem_RawFree(interp->jit_uop_buffer); - interp->jit_uop_buffer = NULL; - } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); From 9af48c2d66324394275072f541d85b2446e39d4c Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Sep 2025 23:48:03 +0100 Subject: [PATCH 20/30] Revert "Just for test" This reverts commit 877f3a90efad8641a253f3b81ef597f8486efe14. --- Python/pystate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/pystate.c b/Python/pystate.c index 85c0a421bb1595..f45087824c1b3d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -808,6 +808,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); + if (interp->jit_uop_buffer != NULL) { + PyMem_RawFree(interp->jit_uop_buffer); + interp->jit_uop_buffer = NULL; + } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); From 989fa8e2cada9417fa7f41041f3e234ecfdbb0eb Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 06:40:01 +0100 Subject: [PATCH 21/30] Test statically allocated --- Include/internal/pycore_interp_structs.h | 2 +- Python/optimizer.c | 2 ++ Python/pystate.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index e6b11c3224a2b8..86c906bd21b9e4 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -900,7 +900,7 @@ struct _is { struct _qsbr_shared qsbr; #ifdef _Py_TIER2 - struct _PyUOpInstruction *jit_uop_buffer; + struct _PyUOpInstruction jit_uop_buffer[UOP_MAX_TRACE_LENGTH]; #endif #if defined(Py_GIL_DISABLED) diff --git a/Python/optimizer.c b/Python/optimizer.c index 259cdb0e698af4..3c04b0c8fe422e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1281,12 +1281,14 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); PyInterpreterState *interp = _PyInterpreterState_GET(); + /** if (interp->jit_uop_buffer == NULL) { interp->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); if (interp->jit_uop_buffer == NULL) { return 0; } } + **/ _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); diff --git a/Python/pystate.c b/Python/pystate.c index f45087824c1b3d..b1f729a339a752 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -559,7 +559,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 // Ensure the buffer is to be set as NULL. - interp->jit_uop_buffer = NULL; + // interp->jit_uop_buffer = NULL; #endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); @@ -808,10 +808,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); + /** if (interp->jit_uop_buffer != NULL) { PyMem_RawFree(interp->jit_uop_buffer); interp->jit_uop_buffer = NULL; } + **/ #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); From 4dff8168a23f4ae7355147fd540ffd5b2ea81f7f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 10:16:11 +0100 Subject: [PATCH 22/30] Revert "Test statically allocated" This reverts commit 989fa8e2cada9417fa7f41041f3e234ecfdbb0eb. --- Include/internal/pycore_interp_structs.h | 2 +- Python/optimizer.c | 2 -- Python/pystate.c | 4 +--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 86c906bd21b9e4..e6b11c3224a2b8 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -900,7 +900,7 @@ struct _is { struct _qsbr_shared qsbr; #ifdef _Py_TIER2 - struct _PyUOpInstruction jit_uop_buffer[UOP_MAX_TRACE_LENGTH]; + struct _PyUOpInstruction *jit_uop_buffer; #endif #if defined(Py_GIL_DISABLED) diff --git a/Python/optimizer.c b/Python/optimizer.c index 3c04b0c8fe422e..259cdb0e698af4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1281,14 +1281,12 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); PyInterpreterState *interp = _PyInterpreterState_GET(); - /** if (interp->jit_uop_buffer == NULL) { interp->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); if (interp->jit_uop_buffer == NULL) { return 0; } } - **/ _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); diff --git a/Python/pystate.c b/Python/pystate.c index b1f729a339a752..f45087824c1b3d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -559,7 +559,7 @@ init_interpreter(PyInterpreterState *interp, #ifdef _Py_TIER2 // Ensure the buffer is to be set as NULL. - // interp->jit_uop_buffer = NULL; + interp->jit_uop_buffer = NULL; #endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); @@ -808,12 +808,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); - /** if (interp->jit_uop_buffer != NULL) { PyMem_RawFree(interp->jit_uop_buffer); interp->jit_uop_buffer = NULL; } - **/ #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); From 7e5261ce8970c4dc855c57b2f13e1f93016fb698 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 10:17:26 +0100 Subject: [PATCH 23/30] Make situation to isolate with the change --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 259cdb0e698af4..e4849a09883763 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1287,7 +1287,7 @@ uop_optimize( return 0; } } - _PyUOpInstruction *buffer = interp->jit_uop_buffer; + _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; From 08adf263a5f3c011cd022b7b5227ec4071d15d7d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 11:23:25 +0100 Subject: [PATCH 24/30] fix --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index e4849a09883763..259cdb0e698af4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1287,7 +1287,7 @@ uop_optimize( return 0; } } - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; From 9051791f16e93357d4b4a4b6e9b74819591f89d5 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 11:23:34 +0100 Subject: [PATCH 25/30] fix --- Include/internal/pycore_interp_structs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index e6b11c3224a2b8..03e9c5807ea6f8 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -899,9 +899,7 @@ struct _is { struct _stoptheworld_state stoptheworld; struct _qsbr_shared qsbr; -#ifdef _Py_TIER2 struct _PyUOpInstruction *jit_uop_buffer; -#endif #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; From 928485ac0f40696ffd36aacf5a967f3f746a59de Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 13:36:50 +0100 Subject: [PATCH 26/30] Address code review --- Include/internal/pycore_uop.h | 1 + Python/optimizer.c | 2 +- Python/pystate.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h index b569c46ccb926f..4abefd3b95d21a 100644 --- a/Include/internal/pycore_uop.h +++ b/Include/internal/pycore_uop.h @@ -37,6 +37,7 @@ typedef struct _PyUOpInstruction{ // This is the length of the trace we project initially. #define UOP_MAX_TRACE_LENGTH 1200 +#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) #ifdef __cplusplus } diff --git a/Python/optimizer.c b/Python/optimizer.c index 259cdb0e698af4..e1b7a78a240ea8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1282,7 +1282,7 @@ uop_optimize( _Py_BloomFilter_Init(&dependencies); PyInterpreterState *interp = _PyInterpreterState_GET(); if (interp->jit_uop_buffer == NULL) { - interp->jit_uop_buffer = (_PyUOpInstruction *)PyMem_RawMalloc(UOP_MAX_TRACE_LENGTH*sizeof(_PyUOpInstruction)); + interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); if (interp->jit_uop_buffer == NULL) { return 0; } diff --git a/Python/pystate.c b/Python/pystate.c index f45087824c1b3d..d0094aba04ca4a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -23,6 +23,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_time.h" // _PyTime_Init() +#include "pycore_uop.h". // UOP_BUFFER_SIZE #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() @@ -809,7 +810,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); if (interp->jit_uop_buffer != NULL) { - PyMem_RawFree(interp->jit_uop_buffer); + _PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE); interp->jit_uop_buffer = NULL; } #endif From 0c2a74f920315ef8ca457cc96a97ff9d23ade353 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 13:43:32 +0100 Subject: [PATCH 27/30] nit --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index d0094aba04ca4a..9d771d2687bb0e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -23,7 +23,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_time.h" // _PyTime_Init() -#include "pycore_uop.h". // UOP_BUFFER_SIZE +#include "pycore_uop.h" // UOP_BUFFER_SIZE #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() From a49bcab0ad92be69fee5244cdbdaf22600b875c8 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 13:58:46 +0100 Subject: [PATCH 28/30] Update --- Include/internal/pycore_interp_structs.h | 4 ++-- Python/bytecodes.c | 2 ++ Python/generated_cases.c.h | 2 ++ Python/optimizer.c | 18 +++++++++++++----- Python/pystate.c | 1 + 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 03e9c5807ea6f8..4c55770e01da4f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -899,8 +899,6 @@ struct _is { struct _stoptheworld_state stoptheworld; struct _qsbr_shared qsbr; - struct _PyUOpInstruction *jit_uop_buffer; - #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state @@ -952,6 +950,8 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; + bool compiling; + struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6c3609d293890f..2f1d240f11d6de 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2961,7 +2961,9 @@ dummy_func( start--; } _PyExecutorObject *executor; + assert(!_PyInterpreterState_GET()->compiling); int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); + assert(!_PyInterpreterState_GET()->compiling); if (optimized <= 0) { this_instr[1].counter = restart_backoff_counter(counter); ERROR_IF(optimized < 0); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c1f6f5c85cdd88..048bd8273d7559 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7651,9 +7651,11 @@ start--; } _PyExecutorObject *executor; + assert(!_PyInterpreterState_GET()->compiling); _PyFrame_SetStackPointer(frame, stack_pointer); int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); stack_pointer = _PyFrame_GetStackPointer(frame); + assert(!_PyInterpreterState_GET()->compiling); if (optimized <= 0) { this_instr[1].counter = restart_backoff_counter(counter); if (optimized < 0) { diff --git a/Python/optimizer.c b/Python/optimizer.c index e1b7a78a240ea8..021e257dcbd84f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -116,7 +116,10 @@ _PyOptimizer_Optimize( _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - assert(_PyInterpreterState_GET()->jit); + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->jit); + assert(!interp->compiling); + interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -125,12 +128,14 @@ _PyOptimizer_Optimize( bool progress_needed = chain_depth == 0; PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); + int ret = 0; if (progress_needed && !has_space_for_executor(code, start)) { - return 0; + goto end; } int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { - return err; + ret = err; + goto end; } assert(*executor_ptr != NULL); if (progress_needed) { @@ -143,7 +148,7 @@ _PyOptimizer_Optimize( * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ Py_DECREF(*executor_ptr); - return 0; + goto end; } insert_executor(code, start, index, *executor_ptr); } @@ -152,7 +157,9 @@ _PyOptimizer_Optimize( } (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); - return 1; +end: + interp->compiling = false; + return ret; } static _PyExecutorObject * @@ -1281,6 +1288,7 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->compiling); if (interp->jit_uop_buffer == NULL) { interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); if (interp->jit_uop_buffer == NULL) { diff --git a/Python/pystate.c b/Python/pystate.c index 9d771d2687bb0e..5305fd47791ec4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -577,6 +577,7 @@ init_interpreter(PyInterpreterState *interp, } interp->_code_object_generation = 0; interp->jit = false; + interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0; From f88ee3be9b110195e7364c09f591fdfdc0f1136e Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 14:32:50 +0100 Subject: [PATCH 29/30] Revert "Update" This reverts commit a49bcab0ad92be69fee5244cdbdaf22600b875c8. --- Include/internal/pycore_interp_structs.h | 4 ++-- Python/bytecodes.c | 2 -- Python/generated_cases.c.h | 2 -- Python/optimizer.c | 18 +++++------------- Python/pystate.c | 1 - 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 4c55770e01da4f..03e9c5807ea6f8 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -899,6 +899,8 @@ struct _is { struct _stoptheworld_state stoptheworld; struct _qsbr_shared qsbr; + struct _PyUOpInstruction *jit_uop_buffer; + #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state @@ -950,8 +952,6 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; - bool compiling; - struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2f1d240f11d6de..6c3609d293890f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2961,9 +2961,7 @@ dummy_func( start--; } _PyExecutorObject *executor; - assert(!_PyInterpreterState_GET()->compiling); int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); - assert(!_PyInterpreterState_GET()->compiling); if (optimized <= 0) { this_instr[1].counter = restart_backoff_counter(counter); ERROR_IF(optimized < 0); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 048bd8273d7559..c1f6f5c85cdd88 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7651,11 +7651,9 @@ start--; } _PyExecutorObject *executor; - assert(!_PyInterpreterState_GET()->compiling); _PyFrame_SetStackPointer(frame, stack_pointer); int optimized = _PyOptimizer_Optimize(frame, start, &executor, 0); stack_pointer = _PyFrame_GetStackPointer(frame); - assert(!_PyInterpreterState_GET()->compiling); if (optimized <= 0) { this_instr[1].counter = restart_backoff_counter(counter); if (optimized < 0) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 021e257dcbd84f..e1b7a78a240ea8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -116,10 +116,7 @@ _PyOptimizer_Optimize( _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(interp->jit); - assert(!interp->compiling); - interp->compiling = true; + assert(_PyInterpreterState_GET()->jit); // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -128,14 +125,12 @@ _PyOptimizer_Optimize( bool progress_needed = chain_depth == 0; PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); - int ret = 0; if (progress_needed && !has_space_for_executor(code, start)) { - goto end; + return 0; } int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { - ret = err; - goto end; + return err; } assert(*executor_ptr != NULL); if (progress_needed) { @@ -148,7 +143,7 @@ _PyOptimizer_Optimize( * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ Py_DECREF(*executor_ptr); - goto end; + return 0; } insert_executor(code, start, index, *executor_ptr); } @@ -157,9 +152,7 @@ _PyOptimizer_Optimize( } (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); -end: - interp->compiling = false; - return ret; + return 1; } static _PyExecutorObject * @@ -1288,7 +1281,6 @@ uop_optimize( _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(interp->compiling); if (interp->jit_uop_buffer == NULL) { interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); if (interp->jit_uop_buffer == NULL) { diff --git a/Python/pystate.c b/Python/pystate.c index 5305fd47791ec4..9d771d2687bb0e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -577,7 +577,6 @@ init_interpreter(PyInterpreterState *interp, } interp->_code_object_generation = 0; interp->jit = false; - interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0; From 05825f57e68f3d6f4c677dc2ca0b473397f15e7c Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 15:02:02 +0100 Subject: [PATCH 30/30] fix --- Include/internal/pycore_interp_structs.h | 4 ++-- Python/optimizer.c | 9 ++++++++- Python/pylifecycle.c | 1 + Python/pystate.c | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 03e9c5807ea6f8..4c55770e01da4f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -899,8 +899,6 @@ struct _is { struct _stoptheworld_state stoptheworld; struct _qsbr_shared qsbr; - struct _PyUOpInstruction *jit_uop_buffer; - #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state @@ -952,6 +950,8 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; + bool compiling; + struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; diff --git a/Python/optimizer.c b/Python/optimizer.c index e1b7a78a240ea8..6f27b521018c8c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -116,7 +116,10 @@ _PyOptimizer_Optimize( _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - assert(_PyInterpreterState_GET()->jit); + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->jit); + assert(!interp->compiling); + interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -126,10 +129,12 @@ _PyOptimizer_Optimize( PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); if (progress_needed && !has_space_for_executor(code, start)) { + interp->compiling = false; return 0; } int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { + interp->compiling = false; return err; } assert(*executor_ptr != NULL); @@ -143,6 +148,7 @@ _PyOptimizer_Optimize( * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ Py_DECREF(*executor_ptr); + interp->compiling = false; return 0; } insert_executor(code, start, index, *executor_ptr); @@ -152,6 +158,7 @@ _PyOptimizer_Optimize( } (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); + interp->compiling = false; return 1; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 51a777077d8255..20d985f59e72eb 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1702,6 +1702,7 @@ finalize_modules(PyThreadState *tstate) // Invalidate all executors and turn off JIT: interp->jit = false; + interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); #endif diff --git a/Python/pystate.c b/Python/pystate.c index 9d771d2687bb0e..5305fd47791ec4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -577,6 +577,7 @@ init_interpreter(PyInterpreterState *interp, } interp->_code_object_generation = 0; interp->jit = false; + interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0;