@@ -26,6 +26,9 @@ std::wstring python_home_path_w;
2626std::wstring python_executable_path_w;
2727std::map<std::string, std::tuple<PyObjectPtr, PyObjectPtr>> compilation_cache;
2828std::mutex compilation_cache_mutex;
29+ PyInterpreterStatePtr interpreter_state;
30+ std::map<std::thread::id, PyThreadStatePtr> thread_states;
31+ std::mutex thread_states_mutex;
2932
3033// Wrapper around the Python Global Interpreter Lock (GIL).
3134//
@@ -36,12 +39,63 @@ std::mutex compilation_cache_mutex;
3639//
3740// [1]: https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization
3841class PyGILGuard {
39- PyGILState_STATE state;
42+ // The simplest way to implement this guard is to use `PyGILState_Ensure`
43+ // and `PyGILState_Release`, however this can lead to segfaults when
44+ // using libraries depending on pybind11.
45+ //
46+ // pybind11 is a popular library for writing C extensions in Python
47+ // packages. It provies convenient C++ API on top of the Python C
48+ // API. In particular, it provides conveniences for dealing with
49+ // GIL, one of them being `gil_scoped_acquire`. The implementation
50+ // has a bug that results in a dangling pointer being used. This
51+ // bug only appears when the code runs in a non-main thread that
52+ // manages the `gil_scoped_acquire` checks if the calling thread
53+ // alread holds GIL with `PyGILState_Ensure` and `PyGILState_Release`.
54+ // Specifically, the GIL, in which case it stores the pointer to
55+ // the corresponding `PyThreadState`. After `PyGILState_Release`,
56+ // the thread state is freed, but subsequent usage of `gil_scoped_acquire`
57+ // still re-uses the pointer. This issues has been reported in [1].
58+ //
59+ // In our case, we evaluate Python code dirty scheduler threads.
60+ // This means that the threads are reused and we acquire the GIL
61+ // every time. In order to avoid the pybind11 bug, we want to avoid
62+ // using `PyGILState_Release`, and instead have a permanent `PyThreadState`
63+ // for each of the dirty scheduler threads. We do this by creating
64+ // new state when the given scheduler thread obtains the GIL for
65+ // the first time. Then, we use `PyEval_RestoreThread` and `PyEval_SaveThread`
66+ // to acquire and release the GIL respectively.
67+ //
68+ // NOTE: the dirty scheduler thread pool is fixed, so the map does
69+ // not grow beyond that. If we ever need to acquire the GIL from
70+ // other threads, we should extend this implementation to either
71+ // allow removing the state on destruction, or have a variant with
72+ // `PyGILState_Ensure` and `PyGILState_Release`, as long as it does
73+ // not fall into the bug described above.
74+ //
75+ // [1]: https://github.com/pybind/pybind11/issues/2888
4076
4177public:
42- PyGILGuard () { this ->state = PyGILState_Ensure (); }
78+ PyGILGuard () {
79+ auto thread_id = std::this_thread::get_id ();
80+
81+ PyThreadStatePtr state;
82+
83+ {
84+ auto guard = std::lock_guard<std::mutex>(thread_states_mutex);
85+
86+ if (thread_states.find (thread_id) == thread_states.end ()) {
87+ // Note that PyThreadState_New does not require GIL to be held.
88+ state = PyThreadState_New (interpreter_state);
89+ thread_states[thread_id] = state;
90+ } else {
91+ state = thread_states[thread_id];
92+ }
93+ }
94+
95+ PyEval_RestoreThread (state);
96+ }
4397
44- ~PyGILGuard () { PyGILState_Release ( this -> state ); }
98+ ~PyGILGuard () { PyEval_SaveThread ( ); }
4599};
46100
47101// Ensures the given object refcount is decremented when the guard
@@ -275,6 +329,8 @@ fine::Ok<> init(ErlNifEnv *env, std::string python_dl_path,
275329
276330 Py_InitializeEx (0 );
277331
332+ interpreter_state = PyInterpreterState_Get ();
333+
278334 // In order to use any of the Python C API functions, the calling
279335 // thread must hold the GIL. Since every NIF call may run on a
280336 // different dirty scheduler thread, we need to acquire the GIL at
@@ -285,7 +341,7 @@ fine::Ok<> init(ErlNifEnv *env, std::string python_dl_path,
285341 // See pyo3 [1] for an extra reference.
286342 //
287343 // [1]: https://github.com/PyO3/pyo3/blob/v0.23.3/src/gil.rs#L63-L74
288- PyEval_SaveThread ();
344+ thread_states[ std::this_thread::get_id ()] = PyEval_SaveThread ();
289345
290346 is_initialized = true ;
291347
@@ -405,40 +461,6 @@ sys.stdin = Stdin()
405461
406462FINE_NIF (init, ERL_NIF_DIRTY_JOB_CPU_BOUND);
407463
408- // Note that this NIF is here for the reference, but currently we do
409- // not support deinitialization. While in principle it should be
410- // possible to reinitialize Python, it can lead to issues in practice.
411- // For example, doing so while using numpy simply does not work, see
412- // [1] for discussion points.
413- //
414- // [1]: https://bugs.python.org/issue34309
415- fine::Ok<> terminate (ErlNifEnv *env) {
416- ensure_initialized ();
417-
418- auto init_guard = std::lock_guard<std::mutex>(init_mutex);
419-
420- // Here we only acquire the GIL, since releasing after finalization
421- // makes no sense
422- PyGILState_Ensure ();
423-
424- if (Py_FinalizeEx () == -1 ) {
425- throw std::runtime_error (" failed to finalize Python interpreter" );
426- }
427-
428- is_initialized = false ;
429-
430- auto compilation_cache_guard =
431- std::lock_guard<std::mutex>(compilation_cache_mutex);
432- compilation_cache.clear ();
433-
434- // Raises runtime error on failure, which is propagated automatically
435- unload_python_library ();
436-
437- return fine::Ok<>();
438- }
439-
440- FINE_NIF (terminate, ERL_NIF_DIRTY_JOB_CPU_BOUND);
441-
442464fine::Ok<> janitor_decref (ErlNifEnv *env, uint64_t ptr) {
443465 auto init_guard = std::lock_guard<std::mutex>(init_mutex);
444466
0 commit comments