1515
1616#include " common.h"
1717
18+ #include < atomic>
1819#include < exception>
1920#include < mutex>
2021#include < thread>
@@ -53,6 +54,7 @@ constexpr const char *internals_function_record_capsule_name = "pybind11_functio
5354inline PyTypeObject *make_static_property_type ();
5455inline PyTypeObject *make_default_metaclass ();
5556inline PyObject *make_object_base_type (PyTypeObject *metaclass);
57+ inline void translate_exception (std::exception_ptr p);
5658
5759// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
5860// Thread Specific Storage (TSS) API.
@@ -149,6 +151,20 @@ struct instance_map_shard {
149151
150152static_assert (sizeof (instance_map_shard) % 64 == 0 ,
151153 " instance_map_shard size is not a multiple of 64 bytes" );
154+
155+ inline uint64_t round_up_to_next_pow2 (uint64_t x) {
156+ // Round-up to the next power of two.
157+ // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
158+ x--;
159+ x |= (x >> 1 );
160+ x |= (x >> 2 );
161+ x |= (x >> 4 );
162+ x |= (x >> 8 );
163+ x |= (x >> 16 );
164+ x |= (x >> 32 );
165+ x++;
166+ return x;
167+ }
152168#endif
153169
154170// / Internal data structure used to track registered instances and types.
@@ -178,9 +194,9 @@ struct internals {
178194 // extensions
179195 std::forward_list<std::string> static_strings; // Stores the std::strings backing
180196 // detail::c_str()
181- PyTypeObject *static_property_type;
182- PyTypeObject *default_metaclass;
183- PyObject *instance_base;
197+ PyTypeObject *static_property_type = nullptr ;
198+ PyTypeObject *default_metaclass = nullptr ;
199+ PyObject *instance_base = nullptr ;
184200 // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
185201 PYBIND11_TLS_KEY_INIT (tstate)
186202 PYBIND11_TLS_KEY_INIT (loader_life_support_tls_key)
@@ -189,7 +205,36 @@ struct internals {
189205
190206 type_map<PyObject *> native_enum_type_map;
191207
192- internals () = default ;
208+ internals () {
209+ PyThreadState *cur_tstate = PyThreadState_Get ();
210+ // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
211+ if (!PYBIND11_TLS_KEY_CREATE (tstate)) {
212+ pybind11_fail (
213+ " internals constructor: could not successfully initialize the tstate TSS key!" );
214+ }
215+ PYBIND11_TLS_REPLACE_VALUE (tstate, cur_tstate);
216+
217+ // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
218+ if (!PYBIND11_TLS_KEY_CREATE (loader_life_support_tls_key)) {
219+ pybind11_fail (" internals constructor: could not successfully initialize the "
220+ " loader_life_support TSS key!" );
221+ }
222+
223+ istate = cur_tstate->interp ;
224+ registered_exception_translators.push_front (&translate_exception);
225+ static_property_type = make_static_property_type ();
226+ default_metaclass = make_default_metaclass ();
227+ #ifdef Py_GIL_DISABLED
228+ // Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
229+ auto num_shards
230+ = static_cast <size_t >(round_up_to_next_pow2 (2 * std::thread::hardware_concurrency ()));
231+ if (num_shards == 0 ) {
232+ num_shards = 1 ;
233+ }
234+ instance_shards.reset (new instance_map_shard[num_shards]);
235+ instance_shards_mask = num_shards - 1 ;
236+ #endif
237+ }
193238 internals (const internals &other) = delete ;
194239 internals &operator =(const internals &other) = delete ;
195240 ~internals () {
@@ -206,6 +251,17 @@ struct internals {
206251 }
207252};
208253
254+ // the internals struct (above) is shared between all the modules. local_internals are only
255+ // for a single module. Any changes made to internals may require an update to
256+ // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
257+ // restricted to a single module. Whether a module has local internals or not should not
258+ // impact any other modules, because the only things accessing the local internals is the
259+ // module that contains them.
260+ struct local_internals {
261+ type_map<type_info *> registered_types_cpp;
262+ std::forward_list<ExceptionTranslator> registered_exception_translators;
263+ };
264+
209265enum class holder_enum_t : uint8_t {
210266 undefined,
211267 std_unique_ptr, // Default, lacking interop with std::shared_ptr.
@@ -249,15 +305,49 @@ struct type_info {
249305 " __pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) \
250306 PYBIND11_COMPILER_TYPE_LEADING_UNDERSCORE PYBIND11_PLATFORM_ABI_ID " __"
251307
252- // / Each module locally stores a pointer to the `internals` data. The data
253- // / itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
254- inline internals **&get_internals_pp () {
255- static internals **internals_pp = nullptr ;
256- return internals_pp;
308+ inline PyThreadState *get_thread_state_unchecked () {
309+ #if defined(PYPY_VERSION) || defined(GRAALVM_PYTHON)
310+ return PyThreadState_GET ();
311+ #elif PY_VERSION_HEX < 0x030D0000
312+ return _PyThreadState_UncheckedGet ();
313+ #else
314+ return PyThreadState_GetUnchecked ();
315+ #endif
257316}
258317
259- // forward decl
260- inline void translate_exception (std::exception_ptr);
318+ // / We use this counter to figure out if there are or have been multiple subinterpreters active at
319+ // / any point. This must never decrease while any interpreter may be running in any thread!
320+ inline std::atomic<int > &get_num_interpreters_seen () {
321+ static std::atomic<int > counter (0 );
322+ return counter;
323+ }
324+
325+ template <typename InternalsType>
326+ inline std::unique_ptr<InternalsType> *&get_internals_pp () {
327+ #ifdef PYBIND11_SUBINTERPRETER_SUPPORT
328+ if (get_num_interpreters_seen () > 1 ) {
329+ // Internals is one per interpreter. When multiple interpreters are alive in different
330+ // threads we have to allow them to have different internals, so we need a thread_local.
331+ static thread_local std::unique_ptr<InternalsType> *t_internals_pp = nullptr ;
332+ static thread_local PyInterpreterState *istate_cached = nullptr ;
333+ // Whenever the interpreter changes on the current thread we need to invalidate the
334+ // internals_pp so that it can be pulled from the interpreter's state dict. That is slow,
335+ // so we use the current PyThreadState to check if it is necessary. The caller will see a
336+ // null return and do the fetch from the state dict or create a new one (as needed).
337+ auto *tstate = get_thread_state_unchecked ();
338+ if (!tstate) {
339+ istate_cached = nullptr ;
340+ t_internals_pp = nullptr ;
341+ } else if (tstate->interp != istate_cached) {
342+ istate_cached = tstate->interp ;
343+ t_internals_pp = nullptr ;
344+ }
345+ return t_internals_pp;
346+ }
347+ #endif
348+ static std::unique_ptr<InternalsType> *s_internals_pp = nullptr ;
349+ return s_internals_pp;
350+ }
261351
262352template <class T ,
263353 enable_if_t <std::is_same<std::nested_exception, remove_cvref_t <T>>::value, int > = 0 >
@@ -384,49 +474,46 @@ inline object get_python_state_dict() {
384474 return state_dict;
385475}
386476
387- inline object get_internals_obj_from_state_dict (handle state_dict) {
388- return reinterpret_steal<object>(
389- dict_getitemstringref (state_dict.ptr (), PYBIND11_INTERNALS_ID));
390- }
391-
392- inline internals **get_internals_pp_from_capsule (handle obj) {
393- void *raw_ptr = PyCapsule_GetPointer (obj.ptr (), /* name=*/ nullptr );
394- if (raw_ptr == nullptr ) {
395- raise_from (PyExc_SystemError, " pybind11::detail::get_internals_pp_from_capsule() FAILED" );
396- throw error_already_set ();
477+ template <typename InternalsType>
478+ inline std::unique_ptr<InternalsType> *
479+ get_internals_pp_from_capsule_in_state_dict (dict &state_dict, char const *state_dict_key) {
480+ auto internals_obj
481+ = reinterpret_steal<object>(dict_getitemstringref (state_dict.ptr (), state_dict_key));
482+ if (internals_obj) {
483+ void *raw_ptr = PyCapsule_GetPointer (internals_obj.ptr (), /* name=*/ nullptr );
484+ if (!raw_ptr) {
485+ raise_from (PyExc_SystemError,
486+ " pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED" );
487+ throw error_already_set ();
488+ }
489+ return reinterpret_cast <std::unique_ptr<InternalsType> *>(raw_ptr);
397490 }
398- return static_cast <internals **>(raw_ptr);
399- }
400-
401- inline uint64_t round_up_to_next_pow2 (uint64_t x) {
402- // Round-up to the next power of two.
403- // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
404- x--;
405- x |= (x >> 1 );
406- x |= (x >> 2 );
407- x |= (x >> 4 );
408- x |= (x >> 8 );
409- x |= (x >> 16 );
410- x |= (x >> 32 );
411- x++;
412- return x;
491+ return nullptr ;
413492}
414493
415494// / Return a reference to the current `internals` data
416495PYBIND11_NOINLINE internals &get_internals () {
417- auto ** &internals_pp = get_internals_pp ();
496+ auto *&internals_pp = get_internals_pp<internals> ();
418497 if (internals_pp && *internals_pp) {
498+ // This is the fast path, everything is already setup, just return it
419499 return **internals_pp;
420500 }
421501
502+ // Slow path, something needs fetched from the state dict or created
503+
504+ // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals.
422505 gil_scoped_acquire_simple gil;
423506 error_scope err_scope;
424507
425508 dict state_dict = get_python_state_dict ();
426- if (object internals_obj = get_internals_obj_from_state_dict (state_dict)) {
427- internals_pp = get_internals_pp_from_capsule (internals_obj);
509+ internals_pp = get_internals_pp_from_capsule_in_state_dict<internals>(state_dict,
510+ PYBIND11_INTERNALS_ID);
511+ if (!internals_pp) {
512+ internals_pp = new std::unique_ptr<internals>;
513+ state_dict[PYBIND11_INTERNALS_ID] = capsule (reinterpret_cast <void *>(internals_pp));
428514 }
429- if (internals_pp && *internals_pp) {
515+
516+ if (*internals_pp) {
430517 // We loaded the internals through `state_dict`, which means that our `error_already_set`
431518 // and `builtin_exception` may be different local classes than the ones set up in the
432519 // initial exception translator, below, so add another for our local exception classes.
@@ -435,68 +522,61 @@ PYBIND11_NOINLINE internals &get_internals() {
435522 // libc++ with CPython doesn't require this (types are explicitly exported)
436523 // libc++ with PyPy still need it, awaiting further investigation
437524#if !defined(__GLIBCXX__)
438- (*internals_pp)->registered_exception_translators .push_front (&translate_local_exception);
525+ if ((*internals_pp)->registered_exception_translators .empty ()
526+ || (*internals_pp)->registered_exception_translators .front ()
527+ != &translate_local_exception) {
528+ (*internals_pp)
529+ ->registered_exception_translators .push_front (&translate_local_exception);
530+ }
439531#endif
440532 } else {
441- if (!internals_pp) {
442- internals_pp = new internals *();
443- }
444- auto *&internals_ptr = *internals_pp;
445- internals_ptr = new internals ();
533+ auto &internals_ptr = *internals_pp;
534+ internals_ptr.reset (new internals ());
446535
447- PyThreadState *tstate = PyThreadState_Get ();
448- // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
449- if (!PYBIND11_TLS_KEY_CREATE (internals_ptr->tstate )) {
450- pybind11_fail (" get_internals: could not successfully initialize the tstate TSS key!" );
451- }
452- PYBIND11_TLS_REPLACE_VALUE (internals_ptr->tstate , tstate);
453-
454- // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
455- if (!PYBIND11_TLS_KEY_CREATE (internals_ptr->loader_life_support_tls_key )) {
456- pybind11_fail (" get_internals: could not successfully initialize the "
457- " loader_life_support TSS key!" );
458- }
459-
460- internals_ptr->istate = tstate->interp ;
461- state_dict[PYBIND11_INTERNALS_ID] = capsule (reinterpret_cast <void *>(internals_pp));
462- internals_ptr->registered_exception_translators .push_front (&translate_exception);
463- internals_ptr->static_property_type = make_static_property_type ();
464- internals_ptr->default_metaclass = make_default_metaclass ();
465- internals_ptr->instance_base = make_object_base_type (internals_ptr->default_metaclass );
466- #ifdef Py_GIL_DISABLED
467- // Scale proportional to the number of cores. 2x is a heuristic to reduce contention.
468- auto num_shards
469- = static_cast <size_t >(round_up_to_next_pow2 (2 * std::thread::hardware_concurrency ()));
470- if (num_shards == 0 ) {
471- num_shards = 1 ;
536+ if (!internals_ptr->instance_base ) {
537+ // This calls get_internals, so cannot be called from within the internals constructor
538+ // called above because internals_ptr must be set before get_internals is called again
539+ internals_ptr->instance_base = make_object_base_type (internals_ptr->default_metaclass );
472540 }
473- internals_ptr->instance_shards .reset (new instance_map_shard[num_shards]);
474- internals_ptr->instance_shards_mask = num_shards - 1 ;
475- #endif // Py_GIL_DISABLED
476541 }
542+
477543 return **internals_pp;
478544}
479545
480- // the internals struct (above) is shared between all the modules. local_internals are only
481- // for a single module. Any changes made to internals may require an update to
482- // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
483- // restricted to a single module. Whether a module has local internals or not should not
484- // impact any other modules, because the only things accessing the local internals is the
485- // module that contains them.
486- struct local_internals {
487- type_map<type_info *> registered_types_cpp;
488- std::forward_list<ExceptionTranslator> registered_exception_translators;
489- };
546+ // / A string key uniquely describing this module
547+ inline char const *get_local_internals_id () {
548+ // Use the address of this static itself as part of the key, so that the value is uniquely tied
549+ // to where the module is loaded in memory
550+ static const std::string this_module_idstr
551+ = PYBIND11_MODULE_LOCAL_ID
552+ + std::to_string (reinterpret_cast <uintptr_t >(&this_module_idstr));
553+ return this_module_idstr.c_str ();
554+ }
490555
491556// / Works like `get_internals`, but for things which are locally registered.
492557inline local_internals &get_local_internals () {
493- // Current static can be created in the interpreter finalization routine. If the later will be
494- // destroyed in another static variable destructor, creation of this static there will cause
495- // static deinitialization fiasco. In order to avoid it we avoid destruction of the
496- // local_internals static. One can read more about the problem and current solution here:
497- // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables
498- static auto *locals = new local_internals ();
499- return *locals;
558+ auto *&local_internals_pp = get_internals_pp<local_internals>();
559+ if (local_internals_pp && *local_internals_pp) {
560+ return **local_internals_pp;
561+ }
562+
563+ // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals.
564+ gil_scoped_acquire_simple gil;
565+ error_scope err_scope;
566+
567+ dict state_dict = get_python_state_dict ();
568+ local_internals_pp = get_internals_pp_from_capsule_in_state_dict<local_internals>(
569+ state_dict, get_local_internals_id ());
570+ if (!local_internals_pp) {
571+ local_internals_pp = new std::unique_ptr<local_internals>;
572+ state_dict[get_local_internals_id ()]
573+ = capsule (reinterpret_cast <void *>(local_internals_pp));
574+ }
575+ if (!*local_internals_pp) {
576+ local_internals_pp->reset (new local_internals ());
577+ }
578+
579+ return **local_internals_pp;
500580}
501581
502582#ifdef Py_GIL_DISABLED
0 commit comments