diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ccf85e627f9b5f..ae16bc6990c170 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1353,6 +1353,32 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. versionadded:: 3.11 +.. c:function:: int PyUnstable_ThreadState_SetStack(PyThreadState *tstate, void *stack_start_addr, size_t stack_size) + + Set the stack start address and stack size of a Python thread state. + + *stack_size* must be greater than ``0``. + + On success, return ``0``. + On failure, set an exception and return ``-1``. + + .. seealso:: + The :c:func:`PyUnstable_ThreadState_ResetStack` function. + + .. versionadded:: next + + +.. c:function:: void PyUnstable_ThreadState_ResetStack(PyThreadState *tstate) + + Reset the stack start address and stack size of a Python thread state to + the operating system defaults. + + .. seealso:: + The :c:func:`PyUnstable_ThreadState_SetStack` function. + + .. versionadded:: next + + .. c:function:: PyInterpreterState* PyInterpreterState_Get(void) Get the current interpreter. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index c226f57e5025f0..2582673f0c5c08 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -2981,6 +2981,11 @@ New features in the C API as arguments to C API functions. (Contributed by Sam Gross in :gh:`133164`.) +* Add :c:func:`PyUnstable_ThreadState_SetStack` and + :c:func:`PyUnstable_ThreadState_ResetStack` functions to set the stack base + address and stack size of a Python thread state. + (Contributed by Victor Stinner in :gh:`139653`.) + Limited C API changes --------------------- diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ac8798ff6129a0..62a2fdda52ca75 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -252,6 +252,16 @@ PyAPI_FUNC(int) PyGILState_Check(void); */ PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void); +// Set the stack start address and stack size of a Python thread state +PyAPI_FUNC(int) PyUnstable_ThreadState_SetStack( + PyThreadState *tstate, + void *stack_start_addr, // Stack start address + size_t stack_size); // Stack size (in bytes) + +// Reset the stack start address and stack size of a Python thread state +PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStack( + PyThreadState *tstate); + /* Routines for advanced debuggers, requested by David Beazley. Don't use unless you know what you are doing! */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void); diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 102a378f8f08bc..908d8f5d8e929b 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -243,8 +243,6 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) { (void)tstate; } -PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate); - static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; diff --git a/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst b/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst new file mode 100644 index 00000000000000..f50a976574038f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-10-06-22-17-47.gh-issue-139653.6-1MOd.rst @@ -0,0 +1,3 @@ +Add :c:func:`PyUnstable_ThreadState_SetStack` and +:c:func:`PyUnstable_ThreadState_ResetStack` functions to set the stack base +address and stack size of a Python thread state. Patch by Victor Stinner. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c2647d405e25bc..979e098a20a08c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2418,6 +2418,41 @@ set_vectorcall_nop(PyObject *self, PyObject *func) Py_RETURN_NONE; } +static void +check_threadstate_set_stack(PyThreadState *tstate, void *start, size_t size) +{ + assert(PyUnstable_ThreadState_SetStack(tstate, start, size) == 0); + assert(!PyErr_Occurred()); + + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + assert(ts->c_stack_hard_limit == (uintptr_t)start); + assert(ts->c_stack_top == (uintptr_t)start + size); + assert(ts->c_stack_soft_limit >= ts->c_stack_hard_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); +} + + +static PyObject * +test_threadstate_set_stack(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyThreadState *tstate = PyThreadState_GET(); + assert(!PyErr_Occurred()); + + // Test a size smaller than _PyOS_STACK_MARGIN_BYTES + size_t size = 4096; + assert(size < _PyOS_STACK_MARGIN_BYTES); + void *start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack(tstate, start, size); + + // Test a larger size + size = 7654321; + start = (void*)(_Py_get_machine_stack_pointer() - size); + check_threadstate_set_stack(tstate, start, size); + + PyUnstable_ThreadState_ResetStack(tstate); + Py_RETURN_NONE; +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2527,6 +2562,7 @@ static PyMethodDef module_functions[] = { #endif {"simple_pending_call", simple_pending_call, METH_O}, {"set_vectorcall_nop", set_vectorcall_nop, METH_O}, + {"test_threadstate_set_stack", test_threadstate_set_stack, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/ceval.c b/Python/ceval.c index 0ccaacaf3ed5b1..5393893650f9d9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -351,7 +351,7 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count) return 0; } if (_tstate->c_stack_hard_limit == 0) { - _Py_InitializeRecursionLimits(tstate); + PyUnstable_ThreadState_ResetStack(tstate); } return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES; } @@ -439,35 +439,83 @@ int pthread_attr_destroy(pthread_attr_t *a) #endif +static void +tstate_set_stack(PyThreadState *tstate, + void *stack_start_addr, size_t stack_size) +{ + assert(stack_size > 0); + + _PyThreadStateImpl *ts = (_PyThreadStateImpl *)tstate; + ts->c_stack_hard_limit = (uintptr_t)stack_start_addr; + ts->c_stack_top = (uintptr_t)stack_start_addr + stack_size; + + uintptr_t soft_limit = ts->c_stack_hard_limit; + if (stack_size >= _PyOS_STACK_MARGIN_BYTES) { +#ifdef _Py_THREAD_SANITIZER + // Thread sanitizer crashes if we use a bit more than half the stack. + soft_limit += (stack_size / 2); +#else + soft_limit += _PyOS_STACK_MARGIN_BYTES; +#endif + } + ts->c_stack_soft_limit = soft_limit; + + // Sanity checks + assert(ts->c_stack_hard_limit <= ts->c_stack_soft_limit); + assert(ts->c_stack_soft_limit < ts->c_stack_top); + + // Test the stack pointer +#if !defined(NDEBUG) && !defined(__wasi__) + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + assert(ts->c_stack_soft_limit < here_addr); + assert(here_addr < ts->c_stack_top); +#endif +} + + +int +PyUnstable_ThreadState_SetStack(PyThreadState *tstate, + void *stack_start_addr, size_t stack_size) +{ + if (stack_size == 0) { + PyErr_SetString(PyExc_ValueError, "stack_size must be greater than 0"); + return -1; + } + + tstate_set_stack(tstate, stack_start_addr, stack_size); + return 0; +} + + void -_Py_InitializeRecursionLimits(PyThreadState *tstate) +PyUnstable_ThreadState_ResetStack(PyThreadState *tstate) { - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; #ifdef WIN32 ULONG_PTR low, high; GetCurrentThreadStackLimits(&low, &high); - _tstate->c_stack_top = (uintptr_t)high; + ULONG guarantee = 0; SetThreadStackGuarantee(&guarantee); - _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; + + uintptr_t start = (uintptr_t)low + guarantee + _PyOS_STACK_MARGIN_BYTES; + size_t size = (uintptr_t)high - start; + tstate_set_stack(tstate, (void*)start, size); + #elif defined(__APPLE__) pthread_t this_thread = pthread_self(); - void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack - size_t stack_size = pthread_get_stacksize_np(this_thread); - _tstate->c_stack_top = (uintptr_t)stack_addr; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - stack_size; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; + void *top = pthread_get_stackaddr_np(this_thread); // top of the stack + size_t size = pthread_get_stacksize_np(this_thread); + tstate_set_stack(tstate, (char*)top - size, size); + #else - uintptr_t here_addr = _Py_get_machine_stack_pointer(); -/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size -/// (on alpine at least) is much smaller than expected and imposes undue limits -/// compared to the old stack size estimation. (We assume musl is not glibc.) + // XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size + // (on alpine at least) is much smaller than expected and imposes undue limits + // compared to the old stack size estimation. (We assume musl is not glibc.) # if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \ !defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__)) - size_t stack_size, guard_size; - void *stack_addr; pthread_attr_t attr; + size_t guard_size, stack_size; + void *stack_addr; int err = pthread_getattr_np(pthread_self(), &attr); if (err == 0) { err = pthread_attr_getguardsize(&attr, &guard_size); @@ -476,25 +524,23 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) } if (err == 0) { uintptr_t base = ((uintptr_t)stack_addr) + guard_size; - _tstate->c_stack_top = base + stack_size; -#ifdef _Py_THREAD_SANITIZER - // Thread sanitizer crashes if we use a bit more than half the stack. - _tstate->c_stack_soft_limit = base + (stack_size / 2); -#else - _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; -#endif - _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; - assert(_tstate->c_stack_soft_limit < here_addr); - assert(here_addr < _tstate->c_stack_top); - return; + uintptr_t start = base + _PyOS_STACK_MARGIN_BYTES; + size_t pystack_size = (base + stack_size) - start; + tstate_set_stack(tstate, (void*)start, pystack_size); } + else # endif - _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); - _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); + { + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + uintptr_t top = _Py_SIZE_ROUND_UP(here_addr, 4096); + uintptr_t start = top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); + size_t pystack_size = top - start; + tstate_set_stack(tstate, (void*)start, pystack_size); + } #endif } + /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() if the recursion_depth reaches recursion_limit. */ int diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b813166f167d70..a4e56c8e5d442c 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -868,7 +868,7 @@ pycore_interp_init(PyThreadState *tstate) { _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; if (_tstate->c_stack_hard_limit == 0) { - _Py_InitializeRecursionLimits(tstate); + PyUnstable_ThreadState_ResetStack(tstate); } PyInterpreterState *interp = tstate->interp; PyStatus status; diff --git a/Python/pystate.c b/Python/pystate.c index dbed609f29aa07..d338db7818f341 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2093,7 +2093,7 @@ _PyThreadState_Attach(PyThreadState *tstate) } _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; if (_tstate->c_stack_hard_limit == 0) { - _Py_InitializeRecursionLimits(tstate); + PyUnstable_ThreadState_ResetStack(tstate); } while (1) {