Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 8 additions & 22 deletions pyo3-ffi/src/cpython/pystate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ use std::ffi::c_int;
// skipped _PyInterpreterState_RequiresIDRef
// skipped _PyInterpreterState_RequireIDRef

// skipped _PyInterpreterState_GetMainModule

pub type Py_tracefunc = unsafe extern "C" fn(
obj: *mut PyObject,
frame: *mut PyFrameObject,
Expand All @@ -24,8 +22,8 @@ pub const PyTrace_C_EXCEPTION: c_int = 5;
pub const PyTrace_C_RETURN: c_int = 6;
pub const PyTrace_OPCODE: c_int = 7;

// skipped PyTraceInfo
// skipped CFrame
// skipped Py_MAX_SCRIPT_PATH_SIZE
// skipped _PyRemoteDebuggerSupport

/// Private structure used inline in `PyGenObject`
#[cfg(not(PyPy))]
Expand All @@ -44,14 +42,16 @@ pub(crate) struct _PyErr_StackItem {
// skipped _ts (aka PyThreadState)

extern "C" {
// skipped _PyThreadState_Prealloc
// skipped _PyThreadState_UncheckedGet
// skipped _PyThreadState_GetDict

#[cfg(Py_3_13)]
pub fn PyThreadState_GetUnchecked() -> *mut PyThreadState;

// skipped PyThreadState_EnterTracing
// skipped PyThreadState_LeaveTracing

#[cfg_attr(PyPy, link_name = "PyPyGILState_Check")]
pub fn PyGILState_Check() -> c_int;

// skipped _PyGILState_GetInterpreterStateUnsafe
// skipped _PyThread_CurrentFrames
// skipped _PyThread_CurrentExceptions

Expand Down Expand Up @@ -97,17 +97,3 @@ extern "C" {
eval_frame: _PyFrameEvalFunction,
);
}

// skipped _PyInterpreterState_GetConfig
// skipped _PyInterpreterState_GetConfigCopy
// skipped _PyInterpreterState_SetConfig
// skipped _Py_GetConfig

// skipped _PyCrossInterpreterData
// skipped _PyObject_GetCrossInterpreterData
// skipped _PyCrossInterpreterData_NewObject
// skipped _PyCrossInterpreterData_Release
// skipped _PyObject_CheckCrossInterpreterData
// skipped crossinterpdatafunc
// skipped _PyCrossInterpreterData_RegisterClass
// skipped _PyCrossInterpreterData_Lookup
117 changes: 98 additions & 19 deletions src/internal/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ std::thread_local! {
///
/// Additionally, we sometimes need to prevent safe access to the Python interpreter,
/// e.g. when implementing `__traverse__`, which is represented by a negative value.
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
static ATTACH_COUNT: Cell<isize> = const { Cell::new(0) };

/// On Python 3.13+ we can use PyThreadState_GetUnchecked to peek the attached thread state,
/// however that still returns a thread state during `__traverse__` so we need to defend
/// against that.
#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
static ATTACH_FORBIDDEN: Cell<bool> = const { Cell::new(false) };
}

const ATTACH_FORBIDDEN_DURING_TRAVERSE: isize = -1;
Expand All @@ -32,7 +39,16 @@ const ATTACH_FORBIDDEN_DURING_TRAVERSE: isize = -1;
/// which could lead to incorrect conclusions that the thread is attached.
#[inline(always)]
fn thread_is_attached() -> bool {
ATTACH_COUNT.try_with(|c| c.get() > 0).unwrap_or(false)
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
{
ATTACH_COUNT.try_with(|c| c.get() > 0).unwrap_or(false)
}

#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
{
let tstate = unsafe { ffi::PyThreadState_GetUnchecked() };
(!tstate.is_null()) && !is_in_gc_traversal()
}
}

/// RAII type that represents thread attachment to the interpreter.
Expand Down Expand Up @@ -72,6 +88,7 @@ impl AttachGuard {
/// Variant of the above which will will return `None` if the interpreter cannot be attached to.
#[cfg(any(not(Py_LIMITED_API), Py_3_11, test))] // see Python::try_attach
pub(crate) fn try_acquire() -> Option<Self> {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
match ATTACH_COUNT.try_with(|c| c.get()) {
Ok(i) if i > 0 => {
// SAFETY: We just checked that the thread is already attached.
Expand All @@ -83,6 +100,19 @@ impl AttachGuard {
_ => {}
}

// Annoyingly, `PyThreadState_GetUnchecked` can return non-null when in the GC, so
// we have to check if we're in GC traversal.
if is_in_gc_traversal() {
return None;
}
Comment on lines +103 to +107
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @ZeroIntensity is this behaviour of PyThreadState_GetUnchecked returning non-null when inside GC traversal known? It's possible for user code to observe this in implementations of tp_traverse handlers. (Yes, in theory they should not do anything other than visit types in those calls, but we cannot safely guarantee that!)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, what was being used previously that returned NULL inside the GC?

As far as I know, Python has always required that tp_traverse is called with an attached thread state, because:

  1. They're allowed to call arbitrary Python code (though doing so is rare).
  2. More importantly, they access fields protected by the GIL or stop-the-world.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, what was being used previously that returned NULL inside the GC?

PyO3 was using an internal TLS counter (ATTACH_COUNT in the diff here) because we couldn't rely on PyGILState_Check, and using PyGILState_Ensure was a performance drag. Looks like PyThreadState_GetUnchecked is fast enough, and probably would be as fast as our internal TLS counter if we didn't have to do a second TLS check for GC, so it would be great to make that rule that second check unnecessary.

As far as I know, Python has always required that tp_traverse is called with an attached thread state, because

This surprises me, because we've had several crashes reported over time when arbitrary code has been run inside tp_traverse, especially related to Py_DecRef (which we have to assume arbitrary code will call). e.g. #3165, #4479. As a result we banned attaching to the interpreter while inside a tp_traverse handler.

More importantly, they access fields protected by the GIL or stop-the-world.

I think that is not incompatible with the thread being "detached", because GC is defined as a stop-the-world event?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok, you're right on the first part -- tp_traverse is not allowed to call Python code. On the free-threaded build, tp_traverse handlers are called during a stop-the-world, so it is definitely not allowed to enter the eval loop. Sorry about the confusion there!

As a result we banned attaching to the interpreter while inside a tp_traverse handler.

We don't have any explicit behavior for disallowing this in CPython. We just follow the usual rules; we either hold the GIL or make a stop-the-world pause so that no other thread can be attached. I think this is just a difference of the "consenting adults" approach in C vs the memory-safety approach in Rust.

I think that is not incompatible with the thread being "detached", because GC is defined as a stop-the-world event?

I haven't looked too deep into it, but it might be safe to detach the thread state around tp_traverse calls, depending on what state we currently allow them to access. It's just not too useful for us to do so, because it will likely regress performance and also might break some weird-but-valid use cases (for example, calling Py_INCREF in a tp_traverse handler, but not Py_DECREF).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I previously mentioned Py_DECREF as a concern it's worth pointing out that at least on the free-threaded build Py_INCREF is also a problem: python/cpython#123241

It seems like there's some clarification needed on what is allowed operation during tp_traverse and the free-threaded build has made the problem worse, potentially. Until it's documented as permitted my instinct is to allow nothing.

I think this is just a difference of the "consenting adults" approach in C vs the memory-safety approach in Rust.

Perhaps, note that in PyO3 a user can still just do unsafe { Python::assume_attached() } to get "proof" that they're attached to the interpreter, but this would at least stick out as a big red flag in code review to analyze carefully.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(we're in the process of cleaning up API names, currently it's called assume_gil_acquired if you happen to search for this)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Py_INCREF should generally be allowed in tp_traverse. It's a bug that the free-threaded GC doesn't deal with it.

If you're up to it, feel free to submit a PR adding some clarification to tp_traverse. Otherwise, I can do it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for falling off the thread a little - I'm happy to submit a PR, however I might take a little while to get around to it 👍


// SAFETY: always safe to check if the tstate pointer is non-null
let has_tstate = !(unsafe { ffi::PyThreadState_GetUnchecked().is_null() });
if has_tstate {
// SAFETY: we already checked that we're not in GC
return Some(unsafe { Self::assume() });
}

// SAFETY: These APIs are always sound to call
if unsafe { ffi::Py_IsInitialized() } == 0 || is_finalizing() {
// If the interpreter is not initialized, we cannot attach.
Expand Down Expand Up @@ -216,21 +246,28 @@ fn get_pool() -> &'static ReferencePool {

/// A guard which can be used to temporarily detach from the interpreter and restore on `Drop`.
pub(crate) struct SuspendAttach {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
count: isize,
tstate: *mut ffi::PyThreadState,
}

impl SuspendAttach {
pub(crate) unsafe fn new() -> Self {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
let count = ATTACH_COUNT.with(|c| c.replace(0));
let tstate = unsafe { ffi::PyEval_SaveThread() };

Self { count, tstate }
Self {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
count,
tstate,
}
}
}

impl Drop for SuspendAttach {
fn drop(&mut self) {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
ATTACH_COUNT.with(|c| c.set(self.count));
unsafe {
ffi::PyEval_RestoreThread(self.tstate);
Expand All @@ -246,19 +283,32 @@ impl Drop for SuspendAttach {

/// Used to lock safe access to the interpreter
pub(crate) struct ForbidAttaching {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
count: isize,
}

impl ForbidAttaching {
/// Lock access to the interpreter while an implementation of `__traverse__` is running
pub fn during_traverse() -> Self {
Self::new(ATTACH_FORBIDDEN_DURING_TRAVERSE)
Self::new(
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
ATTACH_FORBIDDEN_DURING_TRAVERSE,
)
}

fn new(reason: isize) -> Self {
let count = ATTACH_COUNT.with(|c| c.replace(reason));
fn new(#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))] reason: isize) -> Self {
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
{
let count = ATTACH_COUNT.with(|c| c.replace(reason));

Self { count }
}

Self { count }
#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
{
ATTACH_FORBIDDEN.with(|c| c.set(true));
Self {}
}
}

#[cold]
Expand All @@ -274,7 +324,15 @@ impl ForbidAttaching {

impl Drop for ForbidAttaching {
fn drop(&mut self) {
ATTACH_COUNT.with(|c| c.set(self.count));
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
{
ATTACH_COUNT.with(|c| c.set(self.count));
}

#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
{
ATTACH_FORBIDDEN.with(|c| c.set(false));
}
}
}

Expand Down Expand Up @@ -321,37 +379,57 @@ pub unsafe fn register_decref(obj: NonNull<ffi::PyObject>) {

/// Private helper function to check if we are currently in a GC traversal (as detected by PyO3).
#[cfg(any(not(Py_LIMITED_API), Py_3_11))]
#[inline(always)]
pub(crate) fn is_in_gc_traversal() -> bool {
ATTACH_COUNT
.try_with(|c| c.get() == ATTACH_FORBIDDEN_DURING_TRAVERSE)
.unwrap_or(false)
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
{
ATTACH_COUNT
.try_with(|c| c.get() == ATTACH_FORBIDDEN_DURING_TRAVERSE)
.unwrap_or(false)
}

#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
{
ATTACH_FORBIDDEN.try_with(|c| c.get()).unwrap_or(false)
}
}

/// Increments pyo3's internal attach count - to be called whenever an AttachGuard is created.
#[inline(always)]
fn increment_attach_count() {
// Ignores the error in case this function called from `atexit`.
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
let _ = ATTACH_COUNT.try_with(|c| {
let current = c.get();
if current < 0 {
ForbidAttaching::bail(current);
}
c.set(current + 1);
});

#[cfg(all(Py_3_13, not(Py_LIMITED_API)))]
{
if is_in_gc_traversal() {
ForbidAttaching::bail(ATTACH_FORBIDDEN_DURING_TRAVERSE);
}
}
}

/// Decrements pyo3's internal attach count - to be called whenever AttachGuard is dropped.
#[inline(always)]
fn decrement_attach_count() {
// Ignores the error in case this function called from `atexit`.
let _ = ATTACH_COUNT.try_with(|c| {
let current = c.get();
debug_assert!(
current > 0,
"Negative attach count detected. Please report this error to the PyO3 repo as a bug."
);
c.set(current - 1);
});
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
{
// Ignores the error in case this function called from `atexit`.
let _ = ATTACH_COUNT.try_with(|c| {
let current = c.get();
debug_assert!(
current > 0,
"Negative attach count detected. Please report this error to the PyO3 repo as a bug."
);
c.set(current - 1);
});
}
}

#[cfg(test)]
Expand Down Expand Up @@ -444,6 +522,7 @@ mod tests {

#[test]
#[allow(deprecated)]
#[cfg(not(all(Py_3_13, not(Py_LIMITED_API))))]
fn test_attach_counts() {
// Check `attach` and AttachGuard both increase counts correctly
let get_attach_count = || ATTACH_COUNT.with(|c| c.get());
Expand Down
Loading