diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 4ea374861e..cd3afdfe36 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -39,6 +39,7 @@ /// further ABI-incompatible changes may be made before the ABI is officially /// changed to the new version. #ifndef PYBIND11_INTERNALS_VERSION +// REMINDER for next version bump: remove loader_life_support_tls # define PYBIND11_INTERNALS_VERSION 11 #endif @@ -260,7 +261,7 @@ struct internals { PyObject *instance_base = nullptr; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: thread_specific_storage tstate; - thread_specific_storage loader_life_support_tls; + thread_specific_storage loader_life_support_tls; // OBSOLETE (PR #5830) // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: PyInterpreterState *istate = nullptr; diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 1b23c5c681..23d9407350 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -42,24 +42,40 @@ PYBIND11_NAMESPACE_BEGIN(detail) /// Adding a patient will keep it alive up until the enclosing function returns. class loader_life_support { private: + // Thread-local top-of-stack for loader_life_support frames (linked via parent). + // Observation: loader_life_support needs to be thread-local, + // but we don't need to go to extra effort to keep it + // per-interpreter (i.e., by putting it in internals) since + // individual function calls are already isolated to a single + // interpreter, even though they could potentially call into a + // different interpreter later in the same call chain. This + // saves a significant cost per function call spent in + // loader_life_support destruction. + // Note for future C++17 simplification: + // inline static thread_local loader_life_support *tls_current_frame = nullptr; + static loader_life_support *&tls_current_frame() { + static thread_local loader_life_support *frame_ptr = nullptr; + return frame_ptr; + } + loader_life_support *parent = nullptr; std::unordered_set keep_alive; public: /// A new patient frame is created when a function is entered loader_life_support() { - auto &stack_top = get_internals().loader_life_support_tls; - parent = stack_top.get(); - stack_top = this; + auto &frame = tls_current_frame(); + parent = frame; + frame = this; } /// ... and destroyed after it returns ~loader_life_support() { - auto &stack_top = get_internals().loader_life_support_tls; - if (stack_top.get() != this) { + auto &frame = tls_current_frame(); + if (frame != this) { pybind11_fail("loader_life_support: internal error"); } - stack_top = parent; + frame = parent; for (auto *item : keep_alive) { Py_DECREF(item); } @@ -68,7 +84,7 @@ class loader_life_support { /// This can only be used inside a pybind11-bound function, either by `argument_loader` /// at argument preparation time or by `py::cast()` at execution time. PYBIND11_NOINLINE static void add_patient(handle h) { - loader_life_support *frame = get_internals().loader_life_support_tls.get(); + loader_life_support *frame = tls_current_frame(); if (!frame) { // NOTE: It would be nice to include the stack frames here, as this indicates // use of pybind11::cast<> outside the normal call framework, finding such