From 175ae9a1e62fd06977a4646d46aaca1b9fa526e3 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Sun, 30 Mar 2025 12:15:40 -0400 Subject: [PATCH 1/2] [libc] ensure tls dtors are called in main thread --- libc/src/__support/threads/thread.cpp | 5 ++++ libc/src/__support/threads/thread.h | 6 ++-- libc/src/stdlib/exit.cpp | 4 +++ .../src/__support/threads/CMakeLists.txt | 21 +++++++++++++ .../__support/threads/double_exit_test.cpp | 23 ++++++++++++++ .../src/__support/threads/main_exit_test.cpp | 30 +++++++++++++++++++ 6 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 libc/test/integration/src/__support/threads/double_exit_test.cpp create mode 100644 libc/test/integration/src/__support/threads/main_exit_test.cpp diff --git a/libc/src/__support/threads/thread.cpp b/libc/src/__support/threads/thread.cpp index 6f6b75be5766d..c7135596622c6 100644 --- a/libc/src/__support/threads/thread.cpp +++ b/libc/src/__support/threads/thread.cpp @@ -154,6 +154,9 @@ ThreadAtExitCallbackMgr *get_thread_atexit_callback_mgr() { } void call_atexit_callbacks(ThreadAttributes *attrib) { + if (attrib->dtors_called) + return; + attrib->dtors_called = true; attrib->atexit_callback_mgr->call(); for (size_t i = 0; i < TSS_KEY_COUNT; ++i) { TSSValueUnit &unit = tss_values[i]; @@ -163,6 +166,8 @@ void call_atexit_callbacks(ThreadAttributes *attrib) { } } +extern "C" void __cxa_thread_finalize() { call_atexit_callbacks(self.attrib); } + } // namespace internal cpp::optional new_tss_key(TSSDtor *dtor) { diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h index f2b1f6bbb253d..f7710fde2c70d 100644 --- a/libc/src/__support/threads/thread.h +++ b/libc/src/__support/threads/thread.h @@ -109,12 +109,14 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes { ThreadReturnValue retval; ThreadAtExitCallbackMgr *atexit_callback_mgr; void *platform_data; + bool dtors_called; - constexpr ThreadAttributes() + LIBC_INLINE constexpr ThreadAttributes() : detach_state(uint32_t(DetachState::DETACHED)), stack(nullptr), stacksize(0), guardsize(0), tls(0), tls_size(0), owned_stack(false), tid(-1), style(ThreadStyle::POSIX), retval(), - atexit_callback_mgr(nullptr), platform_data(nullptr) {} + atexit_callback_mgr(nullptr), platform_data(nullptr), + dtors_called(false) {} }; using TSSDtor = void(void *); diff --git a/libc/src/stdlib/exit.cpp b/libc/src/stdlib/exit.cpp index 28a6f8a63c0c6..097a52339e5e8 100644 --- a/libc/src/stdlib/exit.cpp +++ b/libc/src/stdlib/exit.cpp @@ -14,8 +14,12 @@ namespace LIBC_NAMESPACE_DECL { extern "C" void __cxa_finalize(void *); +extern "C" [[gnu::weak]] void __cxa_thread_finalize(); +// TODO: use recursive mutex to protect this routine. [[noreturn]] LLVM_LIBC_FUNCTION(void, exit, (int status)) { + if (__cxa_thread_finalize) + __cxa_thread_finalize(); __cxa_finalize(nullptr); internal::exit(status); } diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt index 5a12d28ada3fd..40e96681b1207 100644 --- a/libc/test/integration/src/__support/threads/CMakeLists.txt +++ b/libc/test/integration/src/__support/threads/CMakeLists.txt @@ -25,3 +25,24 @@ add_integration_test( DEPENDS libc.src.__support.threads.thread ) + +add_integration_test( + main_exit_test + SUITE + libc-support-threads-integration-tests + SRCS + main_exit_test.cpp + DEPENDS + libc.src.__support.threads.thread +) + +add_integration_test( + double_exit_test + SUITE + libc-support-threads-integration-tests + SRCS + double_exit_test.cpp + DEPENDS + libc.src.__support.threads.thread + libc.src.stdlib.exit +) diff --git a/libc/test/integration/src/__support/threads/double_exit_test.cpp b/libc/test/integration/src/__support/threads/double_exit_test.cpp new file mode 100644 index 0000000000000..e4a163644a970 --- /dev/null +++ b/libc/test/integration/src/__support/threads/double_exit_test.cpp @@ -0,0 +1,23 @@ +//===-- Test handling of thread local data --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "src/stdlib/exit.h" +#include "test/IntegrationTest/test.h" + +extern "C" { +[[gnu::weak]] +void *__dso_handle = nullptr; +int __cxa_thread_atexit_impl(void (*func)(void *), void *arg, void *dso); +} + +TEST_MAIN() { + __cxa_thread_atexit_impl([](void *) { LIBC_NAMESPACE::exit(0); }, nullptr, + __dso_handle); + return 0; +} diff --git a/libc/test/integration/src/__support/threads/main_exit_test.cpp b/libc/test/integration/src/__support/threads/main_exit_test.cpp new file mode 100644 index 0000000000000..c90e4e569cfba --- /dev/null +++ b/libc/test/integration/src/__support/threads/main_exit_test.cpp @@ -0,0 +1,30 @@ +//===-- Test handling of thread local data --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "test/IntegrationTest/test.h" + +bool called = false; + +extern "C" { +[[gnu::weak]] +void *__dso_handle = nullptr; +int __cxa_thread_atexit_impl(void (*func)(void *), void *arg, void *dso); +} + +[[gnu::destructor]] +void destructor() { + if (!called) + __builtin_trap(); +} + +TEST_MAIN() { + __cxa_thread_atexit_impl([](void *) { called = true; }, nullptr, + __dso_handle); + return 0; +} From 1afb0e07d7e206ebf6ca6ad730820d742af0c2d1 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Mon, 31 Mar 2025 10:42:11 -0400 Subject: [PATCH 2/2] [libc] add _malloc_thread_cleanup option --- libc/config/config.json | 4 ++ libc/docs/configure.rst | 1 + .../__support/threads/linux/CMakeLists.txt | 7 +++ libc/src/__support/threads/linux/thread.cpp | 9 ++++ libc/src/stdlib/CMakeLists.txt | 8 ++++ libc/src/stdlib/atexit.cpp | 8 ++++ .../src/__support/threads/CMakeLists.txt | 15 ++++++ .../__support/threads/malloc_cleanup_test.cpp | 47 +++++++++++++++++++ 8 files changed, 99 insertions(+) create mode 100644 libc/test/integration/src/__support/threads/malloc_cleanup_test.cpp diff --git a/libc/config/config.json b/libc/config/config.json index d738aade74427..b2688f1b29309 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -89,6 +89,10 @@ "LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT": { "value": 100, "doc": "Default number of spins before blocking if a rwlock is in contention (default to 100)." + }, + "LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP": { + "value": false, + "doc": "Enable the `_malloc_thread_cleanup` weak symbol. When defined, this is function is called after `__cxa` and pthread-specific dtors. On main thread, this will be called after `atexit` functions and `.fini` dtors, right before TLS tearing down. This function can be overridden by allocators to perform cleanup. Allocators can use this symbol to avoid registering thread dtors using potentially reentrant routines." } }, "math": { diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index dee9a63101eb9..182d373c075f6 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -47,6 +47,7 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance. - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the output mechanism to reduce code size. * **"pthread" options** + - ``LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP``: Enable the `_malloc_thread_cleanup` weak symbol. When defined, this is function is called after `__cxa` and pthread-specific dtors. On main thread, this will be called after `atexit` functions and `.fini` dtors, right before TLS tearing down. This function can be overridden by allocators to perform cleanup. Allocators can use this symbol to avoid registering thread dtors using potentially reentrant routines. - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100). - ``LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a rwlock is in contention (default to 100). - ``LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY``: Automatically adjust timeout to CLOCK_MONOTONIC (default to true). POSIX API may require CLOCK_REALTIME, which can be unstable and leading to unexpected behavior. This option will convert the real-time timestamp to monotonic timestamp relative to the time of call. diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 364e7e2b90585..3e7c16afe0f6e 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -71,6 +71,12 @@ add_header_library( libc.src.__support.threads.mutex_common ) +if (LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP) + set(malloc_cleanup_flags -DLIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP) +else() + set(malloc_cleanup_flags) +endif() + add_object_library( thread SRCS @@ -89,6 +95,7 @@ add_object_library( libc.src.__support.threads.thread_common COMPILE_OPTIONS ${libc_opt_high_flag} + ${malloc_cleanup_flags} -fno-omit-frame-pointer # This allows us to sniff out the thread args from # the new thread's stack reliably. -Wno-frame-address # Yes, calling __builtin_return_address with a diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index c531d74c53355..2d6d4e517064d 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -482,6 +482,10 @@ int Thread::get_name(cpp::StringStream &name) const { return 0; } +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP +extern "C" [[gnu::weak]] void _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + void thread_exit(ThreadReturnValue retval, ThreadStyle style) { auto attrib = self.attrib; @@ -494,6 +498,11 @@ void thread_exit(ThreadReturnValue retval, ThreadStyle style) { // different thread. The destructors of thread local and TSS objects should // be called by the thread which owns them. internal::call_atexit_callbacks(attrib); +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + // call _malloc_thread_cleanup after the atexit callbacks + if (_malloc_thread_cleanup) + _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP uint32_t joinable_state = uint32_t(DetachState::JOINABLE); if (!attrib->detach_state.compare_exchange_strong( diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 74ae864f72e23..7dd0c969cf9b2 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -589,6 +589,12 @@ add_header_library( ) endif() +if (LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP) + set(malloc_cleanup_flags -DLIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP) +else() + set(malloc_cleanup_flags) +endif() + add_entrypoint_object( atexit SRCS @@ -599,6 +605,8 @@ add_entrypoint_object( 20 # For constinit DEPENDS .exit_handler + COMPILE_OPTIONS + ${malloc_cleanup_flags} ) add_entrypoint_object( diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp index 799aad136bda5..b75c0e40c70d8 100644 --- a/libc/src/stdlib/atexit.cpp +++ b/libc/src/stdlib/atexit.cpp @@ -24,9 +24,17 @@ int __cxa_atexit(AtExitCallback *callback, void *payload, void *) { return add_atexit_unit(atexit_callbacks, {callback, payload}); } +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP +extern "C" [[gnu::weak]] void _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + void __cxa_finalize(void *dso) { if (!dso) { call_exit_callbacks(atexit_callbacks); +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + if (_malloc_thread_cleanup) + _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP if (teardown_main_tls) teardown_main_tls(); } diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt index 40e96681b1207..8e8ba92378566 100644 --- a/libc/test/integration/src/__support/threads/CMakeLists.txt +++ b/libc/test/integration/src/__support/threads/CMakeLists.txt @@ -36,6 +36,21 @@ add_integration_test( libc.src.__support.threads.thread ) +if (LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP) + add_integration_test( + malloc_cleanup_test + SUITE + libc-support-threads-integration-tests + SRCS + malloc_cleanup_test.cpp + DEPENDS + libc.src.__support.threads.thread + libc.src.stdlib.atexit + libc.src.stdlib.exit + libc.src.stdlib._Exit + ) +endif() + add_integration_test( double_exit_test SUITE diff --git a/libc/test/integration/src/__support/threads/malloc_cleanup_test.cpp b/libc/test/integration/src/__support/threads/malloc_cleanup_test.cpp new file mode 100644 index 0000000000000..a11c3926a6ca0 --- /dev/null +++ b/libc/test/integration/src/__support/threads/malloc_cleanup_test.cpp @@ -0,0 +1,47 @@ +//===-- Test handling of malloc TLS data ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "src/stdlib/_Exit.h" +#include "src/stdlib/atexit.h" +#include "src/stdlib/exit.h" +#include "test/IntegrationTest/test.h" + +bool cxa_dtor_called = false; +bool fini_dtor_called = false; +bool atexit_called = false; +volatile thread_local int tls_accessible = 0; + +extern "C" { +[[gnu::weak]] +void *__dso_handle = nullptr; +int __cxa_thread_atexit_impl(void (*func)(void *), void *arg, void *dso); +void _malloc_thread_cleanup() { + // make sure that TLS is still alive + tls_accessible = 1; + if (!cxa_dtor_called) + __builtin_trap(); + if (!fini_dtor_called) + __builtin_trap(); + if (!atexit_called) + __builtin_trap(); + LIBC_NAMESPACE::_Exit(0); +} +} + +[[gnu::destructor]] +void destructor() { + fini_dtor_called = true; +} + +TEST_MAIN() { + __cxa_thread_atexit_impl([](void *) { cxa_dtor_called = true; }, nullptr, + __dso_handle); + LIBC_NAMESPACE::atexit([]() { atexit_called = true; }); + LIBC_NAMESPACE::exit(1); +}