@@ -26,6 +26,9 @@ std::wstring python_home_path_w;
2626std::wstring python_executable_path_w;
2727std::map<std::string, std::tuple<PyObjectPtr, PyObjectPtr>> compilation_cache;
2828std::mutex compilation_cache_mutex;
29+ PyInterpreterStatePtr interpreter_state;
30+ std::map<std::thread::id, PyThreadStatePtr> thread_states;
31+ std::mutex thread_states_mutex;
2932
3033// Wrapper around the Python Global Interpreter Lock (GIL).
3134//
@@ -36,12 +39,64 @@ std::mutex compilation_cache_mutex;
3639//
3740// [1]: https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization
3841class PyGILGuard {
39- PyGILState_STATE state;
42+ std::thread::id thread_id;
43+
44+ // The simplest way to implement this guard is to use `PyGILState_Ensure`
45+ // and `PyGILState_Release`, however this can lead to segfaults when
46+ // using libraries depending on pybind11.
47+ //
48+ // pybind11 is a popular library for writing C extensions in Python
49+ // packages. It provies convenient C++ API on top of the Python C
50+ // API. In particular, it provides conveniences for dealing with
51+ // GIL, one of them being `gil_scoped_acquire`. The implementation
52+ // has a bug that results in a dangling pointer being used. This
53+ // bug only appears when the code runs in a non-main thread that
54+ // manages the `gil_scoped_acquire` checks if the calling thread
55+ // alread holds GIL with `PyGILState_Ensure` and `PyGILState_Release`.
56+ // Specifically, the GIL, in which case it stores the pointer to
57+ // the corresponding `PyThreadState`. After `PyGILState_Release`,
58+ // the thread state is freed, but subsequent usage of `gil_scoped_acquire`
59+ // still re-uses the pointer. This issues has been reported in [1].
60+ //
61+ // In our case, we evaluate Python code dirty scheduler threads.
62+ // This means that the threads are reused and we acquire the GIL
63+ // every time. In order to avoid the pybind11 bug, we want to avoid
64+ // using `PyGILState_Release`, and instead have a permanent `PyThreadState`
65+ // for each of the dirty scheduler threads. We do this by creating
66+ // new state when the given scheduler thread obtains the GIL for
67+ // the first time. Then, we use `PyEval_RestoreThread` and `PyEval_SaveThread`
68+ // to acquire and release the GIL respectively.
69+ //
70+ // NOTE: the dirty scheduler thread pool is fixed, so the map does
71+ // not grow beyond that. If we ever need to acquire the GIL from
72+ // other threads, we should extend this implementation to either
73+ // allow removing the state on destruction, or have a variant with
74+ // `PyGILState_Ensure` and `PyGILState_Release`, as long as it does
75+ // not fall into the bug described above.
76+ //
77+ // [1]: https://github.com/pybind/pybind11/issues/2888
4078
4179public:
42- PyGILGuard () { this ->state = PyGILState_Ensure (); }
80+ PyGILGuard () {
81+ this ->thread_id = std::this_thread::get_id ();
82+
83+ PyThreadStatePtr state;
84+
85+ {
86+ auto guard = std::lock_guard<std::mutex>(thread_states_mutex);
4387
44- ~PyGILGuard () { PyGILState_Release (this ->state ); }
88+ if (thread_states.find (this ->thread_id ) == thread_states.end ()) {
89+ // Note that PyThreadState_New does not require GIL to be held.
90+ state = PyThreadState_New (interpreter_state);
91+ } else {
92+ state = thread_states[this ->thread_id ];
93+ }
94+ }
95+
96+ PyEval_RestoreThread (state);
97+ }
98+
99+ ~PyGILGuard () { PyEval_SaveThread (); }
45100};
46101
47102// Ensures the given object refcount is decremented when the guard
@@ -275,6 +330,8 @@ fine::Ok<> init(ErlNifEnv *env, std::string python_dl_path,
275330
276331 Py_InitializeEx (0 );
277332
333+ interpreter_state = PyInterpreterState_Get ();
334+
278335 // In order to use any of the Python C API functions, the calling
279336 // thread must hold the GIL. Since every NIF call may run on a
280337 // different dirty scheduler thread, we need to acquire the GIL at
@@ -285,7 +342,7 @@ fine::Ok<> init(ErlNifEnv *env, std::string python_dl_path,
285342 // See pyo3 [1] for an extra reference.
286343 //
287344 // [1]: https://github.com/PyO3/pyo3/blob/v0.23.3/src/gil.rs#L63-L74
288- PyEval_SaveThread ();
345+ thread_states[ std::this_thread::get_id ()] = PyEval_SaveThread ();
289346
290347 is_initialized = true ;
291348
@@ -405,40 +462,6 @@ sys.stdin = Stdin()
405462
406463FINE_NIF (init, ERL_NIF_DIRTY_JOB_CPU_BOUND);
407464
408- // Note that this NIF is here for the reference, but currently we do
409- // not support deinitialization. While in principle it should be
410- // possible to reinitialize Python, it can lead to issues in practice.
411- // For example, doing so while using numpy simply does not work, see
412- // [1] for discussion points.
413- //
414- // [1]: https://bugs.python.org/issue34309
415- fine::Ok<> terminate (ErlNifEnv *env) {
416- ensure_initialized ();
417-
418- auto init_guard = std::lock_guard<std::mutex>(init_mutex);
419-
420- // Here we only acquire the GIL, since releasing after finalization
421- // makes no sense
422- PyGILState_Ensure ();
423-
424- if (Py_FinalizeEx () == -1 ) {
425- throw std::runtime_error (" failed to finalize Python interpreter" );
426- }
427-
428- is_initialized = false ;
429-
430- auto compilation_cache_guard =
431- std::lock_guard<std::mutex>(compilation_cache_mutex);
432- compilation_cache.clear ();
433-
434- // Raises runtime error on failure, which is propagated automatically
435- unload_python_library ();
436-
437- return fine::Ok<>();
438- }
439-
440- FINE_NIF (terminate, ERL_NIF_DIRTY_JOB_CPU_BOUND);
441-
442465fine::Ok<> janitor_decref (ErlNifEnv *env, uint64_t ptr) {
443466 auto init_guard = std::lock_guard<std::mutex>(init_mutex);
444467
0 commit comments