From d4df77aa6c1d9abbc94709fe1507b74239c7d704 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 26 Feb 2024 11:15:48 -0600 Subject: [PATCH] [libc] Implement 'atexit' on the GPU correctly Summary: This function was never marked at supported because it was fundamentally broken when called with multiple threads. The patch in https://github.com/llvm/llvm-project/pull/83026 introduces a lock-free stack that can be used to correctly handle enqueuing callbacks from multiple threads. Although the previous interface tried to provide a consistent API, this was not feasible with the needs for a lock-free stack so I have elected to just use ifdefs. The size is fixed to whatever we use for testing, which currently amounts to about 8KiB dedicated for this thing, which isn't enough to be concenred about. Depends on https://github.com/llvm/llvm-project/pull/83026 --- libc/docs/gpu/support.rst | 1 + libc/src/stdlib/CMakeLists.txt | 37 +++++++++------ libc/src/stdlib/atexit.cpp | 9 +--- libc/src/stdlib/gpu/CMakeLists.txt | 13 ++++++ libc/src/stdlib/gpu/atexit.cpp | 63 ++++++++++++++++++++++++++ libc/startup/gpu/amdgpu/CMakeLists.txt | 1 + libc/startup/gpu/nvptx/CMakeLists.txt | 1 + 7 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 libc/src/stdlib/gpu/atexit.cpp diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index 250f0a7794de3..b2231eb69a590 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -102,6 +102,7 @@ atol |check| atoll |check| exit |check| |check| abort |check| |check| +atexit |check| labs |check| llabs |check| div |check| diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index bd0bcffe0045d..4fc979a5927ad 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -363,20 +363,22 @@ add_entrypoint_object( libc.src.__support.OSUtil.osutil ) -add_entrypoint_object( - atexit - SRCS - atexit.cpp - HDRS - atexit.h - CXX_STANDARD - 20 # For constinit of the atexit callback list. - DEPENDS - libc.src.__support.fixedvector - libc.src.__support.blockstore - libc.src.__support.threads.mutex - libc.src.__support.CPP.new -) +if(NOT LIBC_TARGET_OS_IS_GPU) + add_entrypoint_object( + atexit + SRCS + atexit.cpp + HDRS + atexit.h + CXX_STANDARD + 20 # For constinit of the atexit callback list. + DEPENDS + libc.src.__support.fixedvector + libc.src.__support.blockstore + libc.src.__support.threads.mutex + libc.src.__support.CPP.new + ) +endif() add_entrypoint_object( exit @@ -398,6 +400,13 @@ add_entrypoint_object( ) if(LIBC_TARGET_OS_IS_GPU) + add_entrypoint_object( + atexit + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.atexit + ) + add_entrypoint_object( malloc ALIAS diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp index 1513b7969f0db..3e4498f31ddfe 100644 --- a/libc/src/stdlib/atexit.cpp +++ b/libc/src/stdlib/atexit.cpp @@ -28,14 +28,7 @@ struct AtExitUnit { constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {} }; -#if defined(LIBC_TARGET_ARCH_IS_GPU) -// The GPU build cannot handle the potentially recursive definitions required by -// the BlockStore class. Additionally, the liklihood that someone exceeds this -// while executing on the GPU is extremely small. -// FIXME: It is not generally safe to use 'atexit' on the GPU because the -// mutexes simply passthrough. We will need a lock free stack. -using ExitCallbackList = FixedVector; -#elif defined(LIBC_COPT_PUBLIC_PACKAGING) +#if defined(LIBC_COPT_PUBLIC_PACKAGING) using ExitCallbackList = cpp::ReverseOrderBlockStore; #else // BlockStore uses dynamic memory allocation. To avoid dynamic memory diff --git a/libc/src/stdlib/gpu/CMakeLists.txt b/libc/src/stdlib/gpu/CMakeLists.txt index 71ae10648d004..f461e83fb4748 100644 --- a/libc/src/stdlib/gpu/CMakeLists.txt +++ b/libc/src/stdlib/gpu/CMakeLists.txt @@ -30,3 +30,16 @@ add_entrypoint_object( libc.include.stdlib libc.src.__support.RPC.rpc_client ) + +add_entrypoint_object( + atexit + SRCS + atexit.cpp + HDRS + ../atexit.h + CXX_STANDARD + 20 # For constinit of the atexit callback list. + DEPENDS + libc.include.stdlib + libc.src.__support.fixedstack +) diff --git a/libc/src/stdlib/gpu/atexit.cpp b/libc/src/stdlib/gpu/atexit.cpp new file mode 100644 index 0000000000000..aa87678adf837 --- /dev/null +++ b/libc/src/stdlib/gpu/atexit.cpp @@ -0,0 +1,63 @@ +//===-- GPU implementation of atexit --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/atexit.h" +#include "src/__support/common.h" +#include "src/__support/fixedstack.h" + +namespace LIBC_NAMESPACE { + +namespace { + +using AtExitCallback = void(void *); +using StdCAtExitCallback = void(void); + +struct AtExitUnit { + AtExitCallback *callback = nullptr; + void *payload = nullptr; + constexpr AtExitUnit() = default; + constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {} +}; + +// The GPU interface cannot use the standard implementation because it does not +// support the Mutex type. Instead we use a lock free stack with a sufficiently +// large size. +constinit FixedStack exit_callbacks; + +void stdc_at_exit_func(void *payload) { + reinterpret_cast(payload)(); +} + +} // namespace + +namespace internal { + +void call_exit_callbacks() { + AtExitUnit unit; + while (exit_callbacks.pop(unit)) + unit.callback(unit.payload); +} + +} // namespace internal + +static int add_atexit_unit(const AtExitUnit &unit) { + if (!exit_callbacks.push(unit)) + return -1; + return 0; +} + +extern "C" int __cxa_atexit(AtExitCallback *callback, void *payload, void *) { + return add_atexit_unit({callback, payload}); +} + +LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) { + return add_atexit_unit( + {&stdc_at_exit_func, reinterpret_cast(callback)}); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt index 3ac104ee8ba94..d453d9aa7a0b9 100644 --- a/libc/startup/gpu/amdgpu/CMakeLists.txt +++ b/libc/startup/gpu/amdgpu/CMakeLists.txt @@ -5,6 +5,7 @@ add_startup_object( DEPENDS libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils + libc.src.__support.CPP.new libc.src.stdlib.exit libc.src.stdlib.atexit COMPILE_OPTIONS diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index 3ac104ee8ba94..d453d9aa7a0b9 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -5,6 +5,7 @@ add_startup_object( DEPENDS libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils + libc.src.__support.CPP.new libc.src.stdlib.exit libc.src.stdlib.atexit COMPILE_OPTIONS