From 8de23c018fe6ec5b1f5f492a2532051bc15fce49 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 24 Sep 2025 12:35:54 +0100 Subject: [PATCH] Fix C stack limits by factoring out finding hardware stack limits --- Python/ceval.c | 60 +++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 0ccaacaf3ed5b1..1b52128c858ecb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -438,31 +438,26 @@ int pthread_attr_destroy(pthread_attr_t *a) #endif - -void -_Py_InitializeRecursionLimits(PyThreadState *tstate) +static void +hardware_stack_limits(uintptr_t *top, uintptr_t *base) { - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; #ifdef WIN32 ULONG_PTR low, high; GetCurrentThreadStackLimits(&low, &high); - _tstate->c_stack_top = (uintptr_t)high; + *top = (uintptr_t)high; ULONG guarantee = 0; SetThreadStackGuarantee(&guarantee); - _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; + *base = (uintptr_t)low + guarantee; #elif defined(__APPLE__) pthread_t this_thread = pthread_self(); void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack size_t stack_size = pthread_get_stacksize_np(this_thread); - _tstate->c_stack_top = (uintptr_t)stack_addr; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - stack_size; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; + *top = (uintptr_t)stack_addr; + *base = ((uintptr_t)stack_addr) - stack_size; #else - uintptr_t here_addr = _Py_get_machine_stack_pointer(); -/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size -/// (on alpine at least) is much smaller than expected and imposes undue limits -/// compared to the old stack size estimation. (We assume musl is not glibc.) + /// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size + /// (on alpine at least) is much smaller than expected and imposes undue limits + /// compared to the old stack size estimation. (We assume musl is not glibc.) # if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \ !defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__)) size_t stack_size, guard_size; @@ -475,26 +470,35 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) err |= pthread_attr_destroy(&attr); } if (err == 0) { - uintptr_t base = ((uintptr_t)stack_addr) + guard_size; - _tstate->c_stack_top = base + stack_size; -#ifdef _Py_THREAD_SANITIZER - // Thread sanitizer crashes if we use a bit more than half the stack. - _tstate->c_stack_soft_limit = base + (stack_size / 2); -#else - _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; -#endif - _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; - assert(_tstate->c_stack_soft_limit < here_addr); - assert(here_addr < _tstate->c_stack_top); + *base = ((uintptr_t)stack_addr) + guard_size; + *top = (uintptr_t)stack_addr + stack_size; return; } # endif - _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); - _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096); + *top = top_addr; + *base = top_addr - Py_C_STACK_SIZE; #endif } +void +_Py_InitializeRecursionLimits(PyThreadState *tstate) +{ + uintptr_t top; + uintptr_t base; + hardware_stack_limits(&top, &base); +#ifdef _Py_THREAD_SANITIZER + // Thread sanitizer crashes if we use more than half the stack. + uintptr_t stacksize = top - base; + base += stacksize/2; +#endif + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + _tstate->c_stack_top = top; + _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; + _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; +} + /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() if the recursion_depth reaches recursion_limit. */ int