diff --git a/library/core/src/alloc/global.rs b/library/core/src/alloc/global.rs index 5bf6f143b4f82..a572ba12baa4b 100644 --- a/library/core/src/alloc/global.rs +++ b/library/core/src/alloc/global.rs @@ -115,6 +115,31 @@ use crate::{cmp, ptr}; /// Whether allocations happen or not is not part of the program behavior, even if it /// could be detected via an allocator that tracks allocations by printing or otherwise /// having side effects. +/// +/// # Re-entrance +/// +/// When implementing a global allocator one has to be careful not to create an infinitely recursive +/// implementation by accident, as many constructs in the Rust standard library may allocate in +/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using +/// it is highly problematic in a global allocator. +/// +/// Generally speaking for this reason one should stick to library features available through +/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are +/// guaranteed to not use `#[global_allocator]` to allocate: +/// +/// - [`std::thread_local`], +/// - [`std::thread::current`], +/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and +/// [`Clone`] implementation. +/// +/// [`std`]: ../../std/index.html +/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html +/// [`std::thread_local`]: ../../std/macro.thread_local.html +/// [`std::thread::current`]: ../../std/thread/fn.current.html +/// [`std::thread::park`]: ../../std/thread/fn.park.html +/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html +/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark + #[stable(feature = "global_alloc", since = "1.28.0")] pub unsafe trait GlobalAlloc { /// Allocates memory as described by the given `layout`. diff --git a/library/std/src/alloc.rs b/library/std/src/alloc.rs index 1d61630269ac3..f903a61c2a5d0 100644 --- a/library/std/src/alloc.rs +++ b/library/std/src/alloc.rs @@ -11,7 +11,7 @@ //! //! This attribute allows configuring the choice of global allocator. //! You can use this to implement a completely custom global allocator -//! to route all default allocation requests to a custom object. +//! to route all[^system-alloc] default allocation requests to a custom object. //! //! ```rust //! use std::alloc::{GlobalAlloc, System, Layout}; @@ -52,6 +52,13 @@ //! //! The `#[global_allocator]` can only be used once in a crate //! or its recursive dependencies. +//! +//! [^system-alloc]: Note that the Rust standard library internals may still +//! directly call [`System`] when necessary (for example for the runtime +//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`] +//! for more details). +//! +//! [re-entrance]: trait.GlobalAlloc.html#re-entrance #![deny(unsafe_op_in_unsafe_fn)] #![stable(feature = "alloc_module", since = "1.28.0")] diff --git a/library/std/src/sys/pal/sgx/abi/mod.rs b/library/std/src/sys/pal/sgx/abi/mod.rs index b8c4d7740c4e1..1c6c681d4c179 100644 --- a/library/std/src/sys/pal/sgx/abi/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/mod.rs @@ -3,6 +3,7 @@ use core::arch::global_asm; use core::sync::atomic::{Atomic, AtomicUsize, Ordering}; +use crate::alloc::System; use crate::io::Write; // runtime features @@ -63,7 +64,9 @@ unsafe extern "C" fn tcs_init(secondary: bool) { #[unsafe(no_mangle)] extern "C" fn entry(p1: u64, p2: u64, p3: u64, secondary: bool, p4: u64, p5: u64) -> EntryReturn { // FIXME: how to support TLS in library mode? - let tls = Box::new(tls::Tls::new()); + // We use the System allocator here such that the global allocator may use + // thread-locals. + let tls = Box::new_in(tls::Tls::new(), System); let tls_guard = unsafe { tls.activate() }; if secondary { diff --git a/library/std/src/sys/pal/sgx/abi/tls/mod.rs b/library/std/src/sys/pal/sgx/abi/tls/mod.rs index 41e38b6961680..553814dcb5fda 100644 --- a/library/std/src/sys/pal/sgx/abi/tls/mod.rs +++ b/library/std/src/sys/pal/sgx/abi/tls/mod.rs @@ -89,13 +89,6 @@ impl Tls { ActiveTls { tls: self } } - #[allow(unused)] - pub unsafe fn activate_persistent(self: Box) { - // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. - let ptr = Box::into_raw(self).cast_const().cast::(); - unsafe { set_tls_ptr(ptr) }; - } - unsafe fn current<'a>() -> &'a Tls { // FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition. unsafe { &*(get_tls_ptr() as *const Tls) } diff --git a/library/std/src/sys/thread/hermit.rs b/library/std/src/sys/thread/hermit.rs index 4d9f3b114c2a0..faeaa9ae2dfcc 100644 --- a/library/std/src/sys/thread/hermit.rs +++ b/library/std/src/sys/thread/hermit.rs @@ -1,4 +1,5 @@ use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -16,14 +17,14 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1 << 20; impl Thread { pub unsafe fn new_with_coreid( stack: usize, - p: Box, + init: Box, core_id: isize, ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + let data = Box::into_raw(init); let tid = unsafe { hermit_abi::spawn2( thread_start, - p.expose_provenance(), + data.expose_provenance(), hermit_abi::Priority::into(hermit_abi::NORMAL_PRIO), stack, core_id, @@ -31,35 +32,34 @@ impl Thread { }; return if tid == 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::const_error!(io::ErrorKind::Uncategorized, "unable to create thread!")) } else { Ok(Thread { tid }) }; - extern "C" fn thread_start(main: usize) { - unsafe { - // Finally, let's run some code. - Box::from_raw(ptr::with_exposed_provenance::>(main).cast_mut())(); + extern "C" fn thread_start(data: usize) { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = + unsafe { Box::from_raw(ptr::with_exposed_provenance_mut::(data)) }; + let rust_start = init.init(); + rust_start(); - // run all destructors + // Run all destructors. + unsafe { crate::sys::thread_local::destructors::run(); - crate::rt::thread_cleanup(); } + crate::rt::thread_cleanup(); } } - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { unsafe { - Thread::new_with_coreid(stack, p, -1 /* = no specific core */) + Thread::new_with_coreid(stack, init, -1 /* = no specific core */) } } diff --git a/library/std/src/sys/thread/sgx.rs b/library/std/src/sys/thread/sgx.rs index f20ef7d86b9c7..9e6dcfa16713d 100644 --- a/library/std/src/sys/thread/sgx.rs +++ b/library/std/src/sys/thread/sgx.rs @@ -2,6 +2,7 @@ use crate::io; use crate::sys::pal::abi::{thread, usercalls}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(task_queue::JoinHandle); @@ -13,6 +14,7 @@ pub use self::task_queue::JoinNotifier; mod task_queue { use super::wait_notify; use crate::sync::{Mutex, MutexGuard}; + use crate::thread::ThreadInit; pub type JoinHandle = wait_notify::Waiter; @@ -25,19 +27,20 @@ mod task_queue { } pub(super) struct Task { - p: Box, + init: Box, done: JoinNotifier, } impl Task { - pub(super) fn new(p: Box) -> (Task, JoinHandle) { + pub(super) fn new(init: Box) -> (Task, JoinHandle) { let (done, recv) = wait_notify::new(); let done = JoinNotifier(Some(done)); - (Task { p, done }, recv) + (Task { init, done }, recv) } pub(super) fn run(self) -> JoinNotifier { - (self.p)(); + let rust_start = self.init.init(); + rust_start(); self.done } } @@ -93,14 +96,10 @@ pub mod wait_notify { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, init: Box) -> io::Result { let mut queue_lock = task_queue::lock(); unsafe { usercalls::launch_thread()? }; - let (task, handle) = task_queue::Task::new(p); + let (task, handle) = task_queue::Task::new(init); queue_lock.push(task); Ok(Thread(handle)) } diff --git a/library/std/src/sys/thread/solid.rs b/library/std/src/sys/thread/solid.rs index 46a84faa80225..5953c0e7b6129 100644 --- a/library/std/src/sys/thread/solid.rs +++ b/library/std/src/sys/thread/solid.rs @@ -8,6 +8,7 @@ use crate::sync::atomic::{Atomic, AtomicUsize, Ordering}; use crate::sys::pal::itron::error::{ItronError, expect_success, expect_success_aborting}; use crate::sys::pal::itron::time::dur2reltims; use crate::sys::pal::itron::{abi, task}; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{hint, io}; @@ -27,9 +28,9 @@ unsafe impl Sync for Thread {} /// State data shared between a parent thread and child thread. It's dropped on /// a transition to one of the final states. struct ThreadInner { - /// This field is used on thread creation to pass a closure from + /// This field is used on thread creation to pass initialization data from /// `Thread::new` to the created task. - start: UnsafeCell>>, + init: UnsafeCell>>, /// A state machine. Each transition is annotated with `[...]` in the /// source code. @@ -65,7 +66,7 @@ struct ThreadInner { lifecycle: Atomic, } -// Safety: The only `!Sync` field, `ThreadInner::start`, is only touched by +// Safety: The only `!Sync` field, `ThreadInner::init`, is only touched by // the task represented by `ThreadInner`. unsafe impl Sync for ThreadInner {} @@ -84,13 +85,9 @@ impl Thread { /// # Safety /// /// See `thread::Builder::spawn_unchecked` for safety requirements. - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { + pub unsafe fn new(stack: usize, init: Box) -> io::Result { let inner = Box::new(ThreadInner { - start: UnsafeCell::new(ManuallyDrop::new(p)), + init: UnsafeCell::new(ManuallyDrop::new(init)), lifecycle: AtomicUsize::new(LIFECYCLE_INIT), }); @@ -100,10 +97,11 @@ impl Thread { let inner = unsafe { &*p_inner }; // Safety: Since `trampoline` is called only once for each - // `ThreadInner` and only `trampoline` touches `start`, - // `start` contains contents and is safe to mutably borrow. - let p = unsafe { ManuallyDrop::take(&mut *inner.start.get()) }; - p(); + // `ThreadInner` and only `trampoline` touches `init`, + // `init` contains contents and is safe to mutably borrow. + let init = unsafe { ManuallyDrop::take(&mut *inner.init.get()) }; + let rust_start = init.init(); + rust_start(); // Fix the current thread's state just in case, so that the // destructors won't abort diff --git a/library/std/src/sys/thread/teeos.rs b/library/std/src/sys/thread/teeos.rs index cad100395c9f8..5e71f757eaa4b 100644 --- a/library/std/src/sys/thread/teeos.rs +++ b/library/std/src/sys/thread/teeos.rs @@ -1,5 +1,6 @@ use crate::mem::{self, ManuallyDrop}; use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{cmp, io, ptr}; @@ -24,12 +25,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -62,16 +59,16 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - drop(unsafe { Box::from_raw(p) }); + drop(unsafe { Box::from_raw(data) }); Err(io::Error::from_raw_os_error(ret)) } else { // The new thread will start running earliest after the next yield. @@ -80,15 +77,11 @@ impl Thread { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - // this is not necessary in TEE. - //let _handler = stack_overflow::Handler::new(); - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/unix.rs b/library/std/src/sys/thread/unix.rs index 2d2c4f9021288..21f362edced03 100644 --- a/library/std/src/sys/thread/unix.rs +++ b/library/std/src/sys/thread/unix.rs @@ -14,6 +14,7 @@ use crate::sys::weak::dlsym; #[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))] use crate::sys::weak::weak; use crate::sys::{os, stack_overflow}; +use crate::thread::{ThreadInit, current}; use crate::time::Duration; use crate::{cmp, io, ptr}; #[cfg(not(any( @@ -30,11 +31,6 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024; #[cfg(any(target_os = "espidf", target_os = "nuttx"))] pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used -struct ThreadData { - name: Option>, - f: Box, -} - pub struct Thread { id: libc::pthread_t, } @@ -47,12 +43,8 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - name: Option<&str>, - f: Box, - ) -> io::Result { - let data = Box::into_raw(Box::new(ThreadData { name: name.map(Box::from), f })); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = mem::zeroed(); let mut attr: mem::MaybeUninit = mem::MaybeUninit::uninit(); assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0); @@ -118,12 +110,16 @@ impl Thread { extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { unsafe { - let data = Box::from_raw(data as *mut ThreadData); - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - let _handler = stack_overflow::Handler::new(data.name); - // Finally, let's run some code. - (data.f)(); + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = Box::from_raw(data as *mut ThreadInit); + let rust_start = init.init(); + + // Set up our thread name and stack overflow handler which may get triggered + // if we run out of stack. + let thread = current(); + let _handler = stack_overflow::Handler::new(thread.name().map(Box::from)); + + rust_start(); } ptr::null_mut() } diff --git a/library/std/src/sys/thread/unsupported.rs b/library/std/src/sys/thread/unsupported.rs index a5001efa3b405..2d8524f825499 100644 --- a/library/std/src/sys/thread/unsupported.rs +++ b/library/std/src/sys/thread/unsupported.rs @@ -1,6 +1,7 @@ use crate::ffi::CStr; use crate::io; use crate::num::NonZero; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread(!); @@ -9,11 +10,7 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - _stack: usize, - _name: Option<&str>, - _p: Box, - ) -> io::Result { + pub unsafe fn new(_stack: usize, _init: Box) -> io::Result { Err(io::Error::UNSUPPORTED_PLATFORM) } diff --git a/library/std/src/sys/thread/wasip1.rs b/library/std/src/sys/thread/wasip1.rs index 83001fad49c81..6932691b74390 100644 --- a/library/std/src/sys/thread/wasip1.rs +++ b/library/std/src/sys/thread/wasip1.rs @@ -7,6 +7,7 @@ use crate::mem; use crate::num::NonZero; #[cfg(target_feature = "atomics")] use crate::sys::os; +use crate::thread::ThreadInit; use crate::time::Duration; #[cfg(target_feature = "atomics")] use crate::{cmp, ptr}; @@ -73,12 +74,8 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024; #[cfg(target_feature = "atomics")] impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut native: libc::pthread_t = unsafe { mem::zeroed() }; let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() }; assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0); @@ -100,28 +97,28 @@ impl Thread { } }; - let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) }; - // Note: if the thread creation fails and this assert fails, then p will + let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) }; + // Note: if the thread creation fails and this assert fails, then data will // be leaked. However, an alternative design could cause double-free // which is clearly worse. assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0); return if ret != 0 { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. unsafe { - drop(Box::from_raw(p)); + drop(Box::from_raw(data)); } Err(io::Error::from_raw_os_error(ret)) } else { Ok(Thread { id: native }) }; - extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { - unsafe { - // Finally, let's run some code. - Box::from_raw(main as *mut Box)(); - } + extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + rust_start(); ptr::null_mut() } } diff --git a/library/std/src/sys/thread/windows.rs b/library/std/src/sys/thread/windows.rs index a5640c51c4a5d..1ef496a20cfe4 100644 --- a/library/std/src/sys/thread/windows.rs +++ b/library/std/src/sys/thread/windows.rs @@ -8,6 +8,7 @@ use crate::sys::pal::time::WaitableTimer; use crate::sys::pal::{dur2timeout, to_u16s}; use crate::sys::{c, stack_overflow}; use crate::sys_common::FromInner; +use crate::thread::ThreadInit; use crate::time::Duration; use crate::{io, ptr}; @@ -20,23 +21,19 @@ pub struct Thread { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); // CreateThread rounds up values for the stack size to the nearest page size (at least 4kb). // If a value of zero is given then the default stack size is used instead. // SAFETY: `thread_start` has the right ABI for a thread's entry point. - // `p` is simply passed through to the new thread without being touched. + // `data` is simply passed through to the new thread without being touched. let ret = unsafe { let ret = c::CreateThread( ptr::null_mut(), stack, Some(thread_start), - p as *mut _, + data as *mut _, c::STACK_SIZE_PARAM_IS_A_RESERVATION, ptr::null_mut(), ); @@ -45,19 +42,21 @@ impl Thread { return if let Ok(handle) = ret.try_into() { Ok(Thread { handle: Handle::from_inner(handle) }) } else { - // The thread failed to start and as a result p was not consumed. Therefore, it is + // The thread failed to start and as a result data was not consumed. Therefore, it is // safe to reconstruct the box so that it gets deallocated. - unsafe { drop(Box::from_raw(p)) }; + unsafe { drop(Box::from_raw(data)) }; Err(io::Error::last_os_error()) }; - unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 { - // Next, reserve some stack space for if we otherwise run out of stack. + unsafe extern "system" fn thread_start(data: *mut c_void) -> u32 { + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + let rust_start = init.init(); + + // Reserve some stack space for if we otherwise run out of stack. stack_overflow::reserve_stack(); - // Finally, let's run some code. - // SAFETY: We are simply recreating the box that was leaked earlier. - // It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here. - unsafe { Box::from_raw(main as *mut Box)() }; + + rust_start(); 0 } } diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs index 133e15a0928c6..6c2cdfa4acddf 100644 --- a/library/std/src/sys/thread/xous.rs +++ b/library/std/src/sys/thread/xous.rs @@ -7,6 +7,7 @@ use crate::os::xous::ffi::{ map_memory, update_memory_flags, }; use crate::os::xous::services::{TicktimerScalar, ticktimer_server}; +use crate::thread::ThreadInit; use crate::time::Duration; pub struct Thread { @@ -19,12 +20,8 @@ pub const GUARD_PAGE_SIZE: usize = 4096; impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements - pub unsafe fn new( - stack: usize, - _name: Option<&str>, - p: Box, - ) -> io::Result { - let p = Box::into_raw(Box::new(p)); + pub unsafe fn new(stack: usize, init: Box) -> io::Result { + let data = Box::into_raw(init); let mut stack_size = crate::cmp::max(stack, MIN_STACK_SIZE); if (stack_size & 4095) != 0 { @@ -65,22 +62,32 @@ impl Thread { let tid = create_thread( thread_start as *mut usize, &mut stack_plus_guard_pages[GUARD_PAGE_SIZE..(stack_size + GUARD_PAGE_SIZE)], - p as usize, + data as usize, guard_page_pre, stack_size, 0, ) .map_err(|code| io::Error::from_raw_os_error(code as i32))?; + #[inline(never)] + fn rust_main_thread_not_inlined(init: Box) { + let rust_start = init.init(); + rust_start(); + } + extern "C" fn thread_start( - main: *mut usize, + data: *mut usize, guard_page_pre: usize, stack_size: usize, ) -> ! { - unsafe { - // Run the contents of the new thread. - Box::from_raw(main as *mut Box)(); - } + // SAFETY: we are simply recreating the box that was leaked earlier. + let init = unsafe { Box::from_raw(data as *mut ThreadInit) }; + + // Run the main thread with an inline(never) barrier to prevent + // dealloc calls from being reordered to after the TLS has been destroyed. + // See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 + // for more context. + run_main_thread_not_inlined(init); // Destroy TLS, which will free the TLS page and call the destructor for // any thread local storage (if any). diff --git a/library/std/src/sys/thread_local/destructors/list.rs b/library/std/src/sys/thread_local/destructors/list.rs index b9d5214c438d2..44e00c8a5ae59 100644 --- a/library/std/src/sys/thread_local/destructors/list.rs +++ b/library/std/src/sys/thread_local/destructors/list.rs @@ -1,19 +1,16 @@ +use crate::alloc::System; use crate::cell::RefCell; use crate::sys::thread_local::guard; #[thread_local] -static DTORS: RefCell> = RefCell::new(Vec::new()); +static DTORS: RefCell> = + RefCell::new(Vec::new_in(System)); pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) { let Ok(mut dtors) = DTORS.try_borrow_mut() else { - // This point can only be reached if the global allocator calls this - // function again. - // FIXME: maybe use the system allocator instead? - rtabort!("the global allocator may not use TLS with destructors"); + rtabort!("the System allocator may not use TLS with destructors") }; - guard::enable(); - dtors.push((t, dtor)); } @@ -36,7 +33,7 @@ pub unsafe fn run() { } None => { // Free the list memory. - *dtors = Vec::new(); + *dtors = Vec::new_in(System); break; } } diff --git a/library/std/src/sys/thread_local/key/xous.rs b/library/std/src/sys/thread_local/key/xous.rs index a27cec5ca1a60..db83d2bf4a13a 100644 --- a/library/std/src/sys/thread_local/key/xous.rs +++ b/library/std/src/sys/thread_local/key/xous.rs @@ -38,6 +38,7 @@ use core::arch::asm; +use crate::alloc::System; use crate::mem::ManuallyDrop; use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory}; use crate::ptr; @@ -151,7 +152,10 @@ struct Node { } unsafe fn register_dtor(key: Key, dtor: Dtor) { - let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() })); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let mut node = + ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System)); #[allow(unused_unsafe)] let mut head = unsafe { DTORS.load(Acquire) }; @@ -182,6 +186,11 @@ pub unsafe fn destroy_tls() { }; } +// This is marked inline(never) to prevent dealloc calls from being reordered +// to after the TLS has been destroyed. +// See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950 +// for more context. +#[inline(never)] unsafe fn run_dtors() { let mut any_run = true; diff --git a/library/std/src/sys/thread_local/os.rs b/library/std/src/sys/thread_local/os.rs index 88bb5ae7c650d..d17c3c31beebd 100644 --- a/library/std/src/sys/thread_local/os.rs +++ b/library/std/src/sys/thread_local/os.rs @@ -1,6 +1,6 @@ use super::key::{Key, LazyKey, get, set}; use super::{abort_on_dtor_unwind, guard}; -use crate::alloc::{self, Layout}; +use crate::alloc::{self, GlobalAlloc, Layout, System}; use crate::cell::Cell; use crate::marker::PhantomData; use crate::mem::ManuallyDrop; @@ -113,17 +113,19 @@ pub const fn value_align() -> usize { crate::mem::align_of::>() } -/// Equivalent to `Box>`, but potentially over-aligned. -struct AlignedBox { +/// Equivalent to `Box, System>`, but potentially over-aligned. +struct AlignedSystemBox { ptr: NonNull>, } -impl AlignedBox { +impl AlignedSystemBox { #[inline] fn new(v: Value) -> Self { let layout = Layout::new::>().align_to(ALIGN).unwrap(); - let ptr: *mut Value = (unsafe { alloc::alloc(layout) }).cast(); + // We use the System allocator here to avoid interfering with a potential + // Global allocator using thread-local storage. + let ptr: *mut Value = (unsafe { System.alloc(layout) }).cast(); let Some(ptr) = NonNull::new(ptr) else { alloc::handle_alloc_error(layout); }; @@ -143,7 +145,7 @@ impl AlignedBox { } } -impl Deref for AlignedBox { +impl Deref for AlignedSystemBox { type Target = Value; #[inline] @@ -152,14 +154,14 @@ impl Deref for AlignedBox { } } -impl Drop for AlignedBox { +impl Drop for AlignedSystemBox { #[inline] fn drop(&mut self) { let layout = Layout::new::>().align_to(ALIGN).unwrap(); unsafe { let unwind_result = catch_unwind(AssertUnwindSafe(|| self.ptr.drop_in_place())); - alloc::dealloc(self.ptr.as_ptr().cast(), layout); + System.dealloc(self.ptr.as_ptr().cast(), layout); if let Err(payload) = unwind_result { resume_unwind(payload); } @@ -205,11 +207,11 @@ impl Storage { return ptr::null(); } - let value = AlignedBox::::new(Value { + let value = AlignedSystemBox::::new(Value { value: i.and_then(Option::take).unwrap_or_else(f), key, }); - let ptr = AlignedBox::into_raw(value); + let ptr = AlignedSystemBox::into_raw(value); // SAFETY: // * key came from a `LazyKey` and is thus correct. @@ -227,7 +229,7 @@ impl Storage { // initializer has already returned and the next scope only starts // after we return the pointer. Therefore, there can be no references // to the old value. - drop(unsafe { AlignedBox::::from_raw(old) }); + drop(unsafe { AlignedSystemBox::::from_raw(old) }); } // SAFETY: We just created this value above. @@ -246,7 +248,7 @@ unsafe extern "C" fn destroy_value(ptr: *mut u8) // Note that to prevent an infinite loop we reset it back to null right // before we return from the destructor ourselves. abort_on_dtor_unwind(|| { - let ptr = unsafe { AlignedBox::::from_raw(ptr as *mut Value) }; + let ptr = unsafe { AlignedSystemBox::::from_raw(ptr as *mut Value) }; let key = ptr.key; // SAFETY: `key` is the TLS key `ptr` was stored under. unsafe { set(key, ptr::without_provenance_mut(1)) }; diff --git a/library/std/src/thread/current.rs b/library/std/src/thread/current.rs index f00212bfcb617..ea0c6c7229fe8 100644 --- a/library/std/src/thread/current.rs +++ b/library/std/src/thread/current.rs @@ -269,22 +269,13 @@ fn init_current(current: *mut ()) -> Thread { // BUSY exists solely for this check, but as it is in the slow path, the // extra TLS write above shouldn't matter. The alternative is nearly always // a stack overflow. - - // If you came across this message, contact the author of your allocator. - // If you are said author: A surprising amount of functions inside the - // standard library (e.g. `Mutex`, `thread_local!`, `File` when using long - // paths, even `panic!` when using unwinding), need memory allocation, so - // you'll get circular dependencies all over the place when using them. - // I (joboet) highly recommend using only APIs from core in your allocator - // and implementing your own system abstractions. Still, if you feel that - // a particular API should be entirely allocation-free, feel free to open - // an issue on the Rust repository, we'll see what we can do. + // + // If we reach this point it means our initialization routine ended up + // calling current() either directly, or indirectly through the global + // allocator, which is a bug either way as we may not call the global + // allocator in current(). rtabort!( - "\n\ - Attempted to access thread-local data while allocating said data.\n\ - Do not access functions that allocate in the global allocator!\n\ - This is a bug in the global allocator.\n\ - " + "init_current() was re-entrant, which indicates a bug in the Rust threading implementation" ) } else { debug_assert_eq!(current, DESTROYED); diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index 4259a4d1f3b7c..06e4b252fc8f3 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -24,12 +24,17 @@ use crate::fmt; /// [`with`]) within a thread, and values that implement [`Drop`] get /// destructed when a thread exits. Some platform-specific caveats apply, which /// are explained below. -/// Note that if the destructor panics, the whole process will be [aborted]. +/// Note that, should the destructor panic, the whole process will be [aborted]. +/// On platforms where initialization requires memory allocation, this is +/// performed directly through [`System`], allowing the [global allocator] +/// to make use of thread local storage. /// /// A `LocalKey`'s initializer cannot recursively depend on itself. Using a /// `LocalKey` in this way may cause panics, aborts, or infinite recursion on /// the first call to `with`. /// +/// [`System`]: crate::alloc::System +/// [global allocator]: crate::alloc /// [aborted]: crate::process::abort /// /// # Single-thread Synchronization diff --git a/library/std/src/thread/mod.rs b/library/std/src/thread/mod.rs index fd7cce3f97db8..99a2214466a5c 100644 --- a/library/std/src/thread/mod.rs +++ b/library/std/src/thread/mod.rs @@ -158,6 +158,7 @@ #[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))] mod tests; +use crate::alloc::System; use crate::any::Any; use crate::cell::UnsafeCell; use crate::ffi::CStr; @@ -211,6 +212,36 @@ pub mod local_impl { pub use crate::sys::thread_local::*; } +/// The data passed to the spawned thread for thread initialization. Any thread +/// implementation should start a new thread by calling .init() on this before +/// doing anything else to ensure the current thread is properly initialized and +/// the global allocator works. +pub(crate) struct ThreadInit { + pub handle: Thread, + pub rust_start: Box, +} + +impl ThreadInit { + /// Initialize the 'current thread' mechanism on this thread, returning the + /// Rust entry point. + pub fn init(self: Box) -> Box { + // Set the current thread before any (de)allocations on the global allocator occur, + // so that it may call std::thread::current() in its implementation. This is also + // why we take Box, to ensure the Box is not destroyed until after this point. + // Cloning the handle does not invoke the global allocator, it is an Arc. + if let Err(_thread) = set_current(self.handle.clone()) { + // The current thread should not have set yet. Use an abort to save binary size (see #123356). + rtabort!("current thread handle already set during thread spawn"); + } + + if let Some(name) = self.handle.cname() { + imp::set_name(name); + } + + self.rust_start + } +} + //////////////////////////////////////////////////////////////////////////////// // Builder //////////////////////////////////////////////////////////////////////////////// @@ -502,16 +533,14 @@ impl Builder { }); let id = ThreadId::new(); - let my_thread = Thread::new(id, name); + let thread = Thread::new(id, name); let hooks = if no_hooks { spawnhook::ChildSpawnHooks::default() } else { - spawnhook::run_spawn_hooks(&my_thread) + spawnhook::run_spawn_hooks(&thread) }; - let their_thread = my_thread.clone(); - let my_packet: Arc> = Arc::new(Packet { scope: scope_data, result: UnsafeCell::new(None), @@ -543,19 +572,10 @@ impl Builder { } let f = MaybeDangling::new(f); - let main = move || { - if let Err(_thread) = set_current(their_thread.clone()) { - // Both the current thread handle and the ID should not be - // initialized yet. Since only the C runtime and some of our - // platform code run before this, this point shouldn't be - // reachable. Use an abort to save binary size (see #123356). - rtabort!("something here is badly broken!"); - } - - if let Some(name) = their_thread.cname() { - imp::set_name(name); - } + // The entrypoint of the Rust thread, after platform-specific thread + // initialization is done. + let rust_start = move || { let f = f.into_inner(); let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run()); @@ -578,11 +598,15 @@ impl Builder { scope_data.increment_num_running_threads(); } - let main = Box::new(main); // SAFETY: dynamic size and alignment of the Box remain the same. See below for why the // lifetime change is justified. - let main = - unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + Send + 'static)) }; + let rust_start = unsafe { + Box::from_raw( + Box::into_raw(Box::new(rust_start)) as *mut (dyn FnOnce() + Send + 'static) + ) + }; + + let init = Box::new(ThreadInit { handle: thread.clone(), rust_start }); Ok(JoinInner { // SAFETY: @@ -598,8 +622,8 @@ impl Builder { // Similarly, the `sys` implementation must guarantee that no references to the closure // exist after the thread has terminated, which is signaled by `Thread::join` // returning. - native: unsafe { imp::Thread::new(stack_size, my_thread.name(), main)? }, - thread: my_thread, + native: unsafe { imp::Thread::new(stack_size, init)? }, + thread, packet: my_packet, }) } @@ -1258,21 +1282,41 @@ impl ThreadId { } } _ => { - use crate::sync::{Mutex, PoisonError}; - - static COUNTER: Mutex = Mutex::new(0); - - let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner); - let Some(id) = counter.checked_add(1) else { - // in case the panic handler ends up calling `ThreadId::new()`, - // avoid reentrant lock acquire. - drop(counter); - exhausted(); - }; + use crate::cell::SyncUnsafeCell; + use crate::hint::spin_loop; + use crate::sync::atomic::{Atomic, AtomicBool}; + use crate::thread::yield_now; + + // If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex + // here as we might be trying to get the current thread id in the global allocator, + // and on some platforms Mutex requires allocation. + static COUNTER_LOCKED: Atomic = AtomicBool::new(false); + static COUNTER: SyncUnsafeCell = SyncUnsafeCell::new(0); + + // Acquire lock. + let mut spin = 0; + while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() { + if spin <= 3 { + for _ in 0..(1 << spin) { + spin_loop(); + } + } else { + yield_now(); + } + spin += 1; + } - *counter = id; - drop(counter); - ThreadId(NonZero::new(id).unwrap()) + // SAFETY: we have an exclusive lock on the counter. + unsafe { + if let Some(id) = (*COUNTER.get()).checked_add(1) { + *COUNTER.get() = id; + COUNTER_LOCKED.store(false, Ordering::Release); + ThreadId(NonZero::new(id).unwrap()) + } else { + COUNTER_LOCKED.store(false, Ordering::Release); + exhausted() + } + } } } } @@ -1466,7 +1510,10 @@ impl Inner { /// /// [`thread::current`]: current::current pub struct Thread { - inner: Pin>, + // We use the System allocator such that creating or dropping this handle + // does not interfere with a potential Global allocator using thread-local + // storage. + inner: Pin>, } impl Thread { @@ -1479,7 +1526,7 @@ impl Thread { // SAFETY: We pin the Arc immediately after creation, so its address never // changes. let inner = unsafe { - let mut arc = Arc::::new_uninit(); + let mut arc = Arc::::new_uninit_in(System); let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr(); (&raw mut (*ptr).name).write(name); (&raw mut (*ptr).id).write(id); @@ -1650,7 +1697,7 @@ impl Thread { pub fn into_raw(self) -> *const () { // Safety: We only expose an opaque pointer, which maintains the `Pin` invariant. let inner = unsafe { Pin::into_inner_unchecked(self.inner) }; - Arc::into_raw(inner) as *const () + Arc::into_raw_with_allocator(inner).0 as *const () } /// Constructs a `Thread` from a raw pointer. @@ -1672,10 +1719,12 @@ impl Thread { #[unstable(feature = "thread_raw", issue = "97523")] pub unsafe fn from_raw(ptr: *const ()) -> Thread { // Safety: Upheld by caller. - unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } } + unsafe { + Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) } + } } - fn cname(&self) -> Option<&CStr> { + pub(crate) fn cname(&self) -> Option<&CStr> { if let Some(name) = &self.inner.name { Some(name.as_cstr()) } else if main_thread::get() == Some(self.inner.id) { diff --git a/tests/ui/threads-sendsync/tls-in-global-alloc.rs b/tests/ui/threads-sendsync/tls-in-global-alloc.rs new file mode 100644 index 0000000000000..424e43ebccca0 --- /dev/null +++ b/tests/ui/threads-sendsync/tls-in-global-alloc.rs @@ -0,0 +1,107 @@ +//@ run-pass +//@ needs-threads + +use std::alloc::{GlobalAlloc, Layout, System}; +use std::hint::black_box; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::thread::{Thread, ThreadId}; + +static GLOBAL: AtomicUsize = AtomicUsize::new(0); +static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_ALLOC: AtomicBool = AtomicBool::new(false); +static LOCAL_TRY_WITH_SUCCEEDED_DEALLOC: AtomicBool = AtomicBool::new(false); + +struct LocalForAllocatorWithoutDrop(ThreadId); + +struct LocalForAllocatorWithDrop(Thread); + +impl Drop for LocalForAllocatorWithDrop { + fn drop(&mut self) { + GLOBAL.fetch_or(2, Ordering::Relaxed); + } +} + +struct LocalForUser(u32); + +impl Drop for LocalForUser { + // A user might call the global allocator in a thread-local drop. + fn drop(&mut self) { + self.0 += 1; + drop(black_box(Box::new(self.0))) + } +} + +thread_local! { + static LOCAL_FOR_USER0: LocalForUser = LocalForUser(0); + static LOCAL_FOR_ALLOCATOR_WITHOUT_DROP: LocalForAllocatorWithoutDrop = { + LocalForAllocatorWithoutDrop(std::thread::current().id()) + }; + static LOCAL_FOR_ALLOCATOR_WITH_DROP: LocalForAllocatorWithDrop = { + GLOBAL.fetch_or(1, Ordering::Relaxed); + LocalForAllocatorWithDrop(std::thread::current()) + }; + static LOCAL_FOR_USER1: LocalForUser = LocalForUser(1); +} + +#[global_allocator] +static ALLOC: Alloc = Alloc; +struct Alloc; + +unsafe impl GlobalAlloc for Alloc { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + LOCAL_TRY_WITH_SUCCEEDED_ALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + + let ret = unsafe { System.alloc(layout) }; + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + ret + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + // Make sure we aren't re-entrant. + assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed)); + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed); + + // Should be infallible. + LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| { + assert!(local.0 == std::thread::current().id()); + }); + + // May fail once thread-local destructors start running, and ours has + // been ran. + let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| { + assert!(local.0.id() == std::thread::current().id()); + }); + LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed); + + unsafe { System.dealloc(ptr, layout) } + SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed); + } +} + +fn main() { + std::thread::spawn(|| { + LOCAL_FOR_USER0.with(|l| assert!(l.0 == 0)); + std::hint::black_box(vec![1, 2]); + assert!(GLOBAL.load(Ordering::Relaxed) == 1); + LOCAL_FOR_USER1.with(|l| assert!(l.0 == 1)); + }) + .join() + .unwrap(); + assert!(GLOBAL.load(Ordering::Relaxed) == 3); + assert!(LOCAL_TRY_WITH_SUCCEEDED_ALLOC.load(Ordering::Relaxed)); + assert!(LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.load(Ordering::Relaxed)); +}