-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[libc++] Allows any types of size 4 and 8 to use native platform ulock_wait (Proof of Concept) #161086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libcxx Author: Hui (huixie90) ChangesPatch is 21.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161086.diff 6 Files Affected:
diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h
index b424427e65c33..489336688b090 100644
--- a/libcxx/include/__atomic/atomic.h
+++ b/libcxx/include/__atomic/atomic.h
@@ -212,6 +212,8 @@ struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> {
// __atomic_base<int, false>. So specializing __atomic_base<_Tp> does not work
template <class _Tp, bool _IsIntegral>
struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
+ using __inner_type _LIBCPP_NODEBUG = _Tp;
+
static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) {
return __a.load(__order);
}
diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h
index 5cc6fb0c55d09..f0fcffc9b0b07 100644
--- a/libcxx/include/__atomic/atomic_flag.h
+++ b/libcxx/include/__atomic/atomic_flag.h
@@ -82,6 +82,8 @@ struct atomic_flag {
template <>
struct __atomic_waitable_traits<atomic_flag> {
+ using __inner_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE;
+
static _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATOMIC_FLAG_TYPE __atomic_load(const atomic_flag& __a, memory_order __order) {
return std::__cxx_atomic_load(&__a.__a_, __order);
}
diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h
index 9bdc6b1160d2c..4da7c208a9268 100644
--- a/libcxx/include/__atomic/atomic_ref.h
+++ b/libcxx/include/__atomic/atomic_ref.h
@@ -230,6 +230,8 @@ struct __atomic_ref_base {
template <class _Tp>
struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> {
+ using __inner_type _LIBCPP_NODEBUG = _Tp;
+
static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_ref_base<_Tp>& __a, memory_order __order) {
return __a.load(__order);
}
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 0dae448d649be..d3574c69d4a46 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -38,6 +38,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// The below implementations look ugly to support C++03
template <class _Tp, class = void>
struct __atomic_waitable_traits {
+ using __inner_type _LIBCPP_NODEBUG = void;
+
template <class _AtomicWaitable>
static void __atomic_load(_AtomicWaitable&&, memory_order) = delete;
@@ -58,21 +60,26 @@ struct __atomic_waitable< _Tp,
#if _LIBCPP_STD_VER >= 20
# if _LIBCPP_HAS_THREADS
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(void const volatile*) _NOEXCEPT;
+template <std::size_t _Size>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(void const volatile*, __cxx_contention_t) _NOEXCEPT;
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) _NOEXCEPT;
+
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+__libcpp_atomic_monitor_global(void const volatile* __address) _NOEXCEPT;
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
+__libcpp_atomic_wait_global_table(void const volatile* __address, __cxx_contention_t __monitor_value) _NOEXCEPT;
+
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
+
+template <std::size_t _Size>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
+__cxx_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
+
+template <std::size_t _Size>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t) _NOEXCEPT;
+__cxx_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
template <class _AtomicWaitable, class _Poll>
struct __atomic_wait_backoff_impl {
@@ -81,38 +88,25 @@ struct __atomic_wait_backoff_impl {
memory_order __order_;
using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
-
- _LIBCPP_AVAILABILITY_SYNC
- _LIBCPP_HIDE_FROM_ABI bool
- __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const {
- // In case the contention type happens to be __cxx_atomic_contention_t, i.e. __cxx_atomic_impl<int64_t>,
- // the platform wait is directly monitoring the atomic value itself.
- // `__poll_` takes the current value of the atomic as an in-out argument
- // to potentially modify it. After it returns, `__monitor` has a value
- // which can be safely waited on by `std::__libcpp_atomic_wait` without any
- // ABA style issues.
- __monitor_val = __waitable_traits::__atomic_load(__a_, __order_);
- return __poll_(__monitor_val);
- }
-
- _LIBCPP_AVAILABILITY_SYNC
- _LIBCPP_HIDE_FROM_ABI bool
- __update_monitor_val_and_poll(void const volatile* __contention_address, __cxx_contention_t& __monitor_val) const {
- // In case the contention type is anything else, platform wait is monitoring a __cxx_atomic_contention_t
- // from the global pool, the monitor comes from __libcpp_atomic_monitor
- __monitor_val = std::__libcpp_atomic_monitor(__contention_address);
- auto __current_val = __waitable_traits::__atomic_load(__a_, __order_);
- return __poll_(__current_val);
- }
+ using __inner_type _LIBCPP_NODEBUG = typename __waitable_traits::__inner_type;
_LIBCPP_AVAILABILITY_SYNC
_LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
if (__elapsed > chrono::microseconds(4)) {
auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
- __cxx_contention_t __monitor_val;
- if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
- return true;
- std::__libcpp_atomic_wait(__contention_address, __monitor_val);
+
+ if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+ auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
+ if (__poll_(__atomic_value))
+ return true;
+ std::__libcpp_atomic_wait_native<sizeof(__inner_type)>(__contention_address, &__atomic_value);
+ } else {
+ __cxx_contention_t __monitor_val = std::__libcpp_atomic_monitor_global(__contention_address);
+ auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
+ if (__poll_(__atomic_value))
+ return true;
+ std::__libcpp_atomic_wait_global_table(__contention_address, __monitor_val);
+ }
} else {
} // poll
return false;
@@ -144,13 +138,23 @@ __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& _
template <class _AtomicWaitable>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
- std::__cxx_atomic_notify_one(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
+ if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+ std::__cxx_atomic_notify_one_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ } else {
+ std::__cxx_atomic_notify_one_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ }
}
template <class _AtomicWaitable>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
- std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
+ if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+ std::__cxx_atomic_notify_all_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ } else {
+ std::__cxx_atomic_notify_all_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ }
}
# else // _LIBCPP_HAS_THREADS
diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index 5b42a0125f875..bf14d076d6281 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -11,6 +11,10 @@
#include <__atomic/support.h>
#include <__config>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <cstddef>
#include <cstdint>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -19,10 +23,23 @@
_LIBCPP_BEGIN_NAMESPACE_STD
+template <class _Tp, class = void>
+struct __is_atomic_wait_native_type : false_type {};
+
#if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
+
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_integral<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
+
#else
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp,
+ __enable_if_t<is_integral<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
+ : true_type {};
+
#endif // __linux__ || (_AIX && !__64BIT__)
using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>;
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index b214ba1fd11c0..c5f03acdf1da1 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -9,6 +9,7 @@
#include <__thread/timed_backoff_policy.h>
#include <atomic>
#include <climits>
+#include <cstddef>
#include <functional>
#include <thread>
@@ -53,6 +54,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#ifdef __linux__
+
+// TODO : update
static void
__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
static constexpr timespec __timeout = {2, 0};
@@ -70,22 +73,32 @@ extern "C" int __ulock_wait(
extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value);
// https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/sys/ulock.h#L82
+# define UL_COMPARE_AND_WAIT 1
# define UL_COMPARE_AND_WAIT64 5
# define ULF_WAKE_ALL 0x00000100
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
- static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waiting on 8 bytes value");
- __ulock_wait(UL_COMPARE_AND_WAIT64, const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 0);
+template <std::size_t _Size>
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const volatile* __val) {
+ static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
+ if constexpr (_Size == 4)
+ __ulock_wait(UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const volatile*>(__val), 0);
+ else
+ __ulock_wait(
+ UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const volatile*>(__val), 0);
}
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
- static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waking up on 8 bytes value");
- __ulock_wake(
- UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<__cxx_atomic_contention_t*>(__ptr), 0);
+template <std::size_t _Size>
+static void __libcpp_platform_wake_by_address(void const volatile* __ptr, bool __notify_one) {
+ static_assert(_Size == 8 || _Size == 4, "Can only wake up on 8 bytes or 4 bytes value");
+
+ if constexpr (_Size == 4)
+ __ulock_wake(UL_COMPARE_AND_WAIT | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
+ else
+ __ulock_wake(UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
}
#elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+// TODO : update
/*
* Since __cxx_contention_t is int64_t even on 32bit FreeBSD
* platforms, we have to use umtx ops that work on the long type, and
@@ -104,6 +117,7 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
#else // <- Add other operating systems here
// Baseline is just a timed backoff
+// TODO : update
static void
__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
@@ -128,83 +142,115 @@ static __libcpp_contention_table_entry __libcpp_contention_table[__libcpp_conten
static hash<void const volatile*> __libcpp_contention_hasher;
-static __libcpp_contention_table_entry* __libcpp_contention_state(void const volatile* p) {
+static __libcpp_contention_table_entry* __get_global_contention_state(void const volatile* p) {
return &__libcpp_contention_table[__libcpp_contention_hasher(p) & (__libcpp_contention_table_size - 1)];
}
/* Given an atomic to track contention and an atomic to actually wait on, which may be
the same atomic, we try to detect contention to avoid spuriously calling the platform. */
-static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __contention_state,
- __cxx_atomic_contention_t const volatile* __platform_state,
+template <std::size_t _Size>
+static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __global_contention_state,
+ void const volatile* __address_to_notify,
bool __notify_one) {
- if (0 != __cxx_atomic_load(__contention_state, memory_order_seq_cst))
+ if (0 != __cxx_atomic_load(__global_contention_state, memory_order_seq_cst))
// We only call 'wake' if we consumed a contention bit here.
- __libcpp_platform_wake_by_address(__platform_state, __notify_one);
-}
-static __cxx_contention_t
-__libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__contention_state*/,
- __cxx_atomic_contention_t const volatile* __platform_state) {
- // We will monitor this value.
- return __cxx_atomic_load(__platform_state, memory_order_acquire);
+ __libcpp_platform_wake_by_address<_Size>(__address_to_notify, __notify_one);
}
+
+template <std::size_t _Size>
static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
- __cxx_atomic_contention_t const volatile* __platform_state,
- __cxx_contention_t __old_value) {
+ void const volatile* __address_to_wait,
+ void const volatile* __old_value) {
__cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
// https://llvm.org/PR109290
// There are no platform guarantees of a memory barrier in the platform wait implementation
__cxx_atomic_thread_fence(memory_order_seq_cst);
// We sleep as long as the monitored value hasn't changed.
- __libcpp_platform_wait_on_address(__platform_state, __old_value);
+ __libcpp_platform_wait_on_address<_Size>(__address_to_wait, __old_value);
__cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
}
/* When the incoming atomic is the wrong size for the platform wait size, need to
launder the value sequence through an atomic from our table. */
-static void __libcpp_atomic_notify(void const volatile* __location) {
- auto const __entry = __libcpp_contention_state(__location);
+static void __atomic_notify_global_table(void const volatile* __location) {
+ auto const __entry = __get_global_contention_state(__location);
// The value sequence laundering happens on the next line below.
__cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
- __libcpp_contention_notify(
+ __libcpp_contention_notify<sizeof(__cxx_atomic_contention_t)>(
&__entry->__contention_state,
&__entry->__platform_state,
false /* when laundering, we can't handle notify_one */);
}
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) noexcept {
- __libcpp_atomic_notify(__location);
+
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor_global(void const volatile* __location) noexcept {
+ auto const __entry = __get_global_contention_state(__location);
+ return __cxx_atomic_load(&__entry->__platform_state, memory_order_acquire);
}
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __location) noexcept {
- __libcpp_atomic_notify(__location);
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_global_table(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
+ auto const __entry = __get_global_contention_state(__location);
+ __libcpp_contention_wait<sizeof(__cxx_atomic_contention_t)>(
+ &__entry->__contention_state, &__entry->__platform_state, &__old_value);
}
-_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) noexcept {
- auto const __entry = __libcpp_contention_state(__location);
- return __libcpp_contention_monitor_for_wait(&__entry->__contention_state, &__entry->__platform_state);
+
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) noexcept {
+ __libcpp_contention_wait<_Size>(
+ &__get_global_contention_state(__address)->__contention_state, __address, __old_value);
}
-_LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
- auto const __entry = __libcpp_contention_state(__location);
- __libcpp_contention_wait(&__entry->__contention_state, &__entry->__platform_state, __old_value);
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile* __location) noexcept {
+ __atomic_notify_global_table(__location);
+}
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile* __location) noexcept {
+ __atomic_notify_global_table(__location);
}
/* When the incoming atomic happens to be the platform wait size, we still need to use the
table for the contention detection, but we can use the atomic directly for the wait. */
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept {
- __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, true);
-}
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
- __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native(void const volatile* __location) noexcept {
+ __libcpp_contention_notify<_Size>(&__get_global_contention_state(__location)->__contention_state, __location, true);
}
-// This function is never used, but still exported for ABI compatibility.
-_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
- return __libcpp_contention_monitor_for_wait(&__libcpp_contention...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp,h -- libcxx/include/__atomic/atomic.h libcxx/include/__atomic/atomic_flag.h libcxx/include/__atomic/atomic_ref.h libcxx/include/__atomic/atomic_sync.h libcxx/include/__atomic/contention_t.h libcxx/src/atomic.cpp
View the diff from clang-format here.diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index d3574c69d..f5b5d031a 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -70,16 +70,16 @@ __libcpp_atomic_monitor_global(void const volatile* __address) _NOEXCEPT;
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
__libcpp_atomic_wait_global_table(void const volatile* __address, __cxx_contention_t __monitor_value) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__cxx_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__cxx_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
template <class _AtomicWaitable, class _Poll>
struct __atomic_wait_backoff_impl {
@@ -140,9 +140,11 @@ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _
static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
- std::__cxx_atomic_notify_one_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ std::__cxx_atomic_notify_one_native<sizeof(__inner_type)>(
+ __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
} else {
- std::__cxx_atomic_notify_one_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ std::__cxx_atomic_notify_one_global_table(
+ __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
}
}
@@ -151,9 +153,11 @@ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _
static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
- std::__cxx_atomic_notify_all_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ std::__cxx_atomic_notify_all_native<sizeof(__inner_type)>(
+ __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
} else {
- std::__cxx_atomic_notify_all_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+ std::__cxx_atomic_notify_all_global_table(
+ __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
}
}
diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index a356dc83a..7e5d8574e 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -30,15 +30,16 @@ struct __is_atomic_wait_native_type : false_type {};
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_standard_layout<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_standard_layout<_Tp>::value && sizeof(_Tp) == 4> >
+ : true_type {};
#else
using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp,
- __enable_if_t<is_standard_layout<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
- : true_type {};
+struct __is_atomic_wait_native_type<
+ _Tp,
+ __enable_if_t<is_standard_layout<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> > : true_type {};
#endif // __linux__ || (_AIX && !__64BIT__)
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index c5f03acdf..e233f32f0 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -54,7 +54,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#ifdef __linux__
-
// TODO : update
static void
__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
|
@@ -11,6 +11,10 @@ | |||
|
|||
#include <__atomic/support.h> | |||
#include <__config> | |||
#include <__type_traits/enable_if.h> | |||
#include <__type_traits/integral_constant.h> | |||
#include <__type_traits/is_standard_layout.h> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use of is_standard_layout
looks incorrect to me. I guess we want object types without paddings, which is orthogonal to the standard-layout property.
|
||
template <class _Tp> | ||
struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> { | ||
using __inner_type _LIBCPP_NODEBUG = _Tp; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason why we don't simply use ::value_type
on all of these types?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
atomic_flag
does not have value_type
using __cxx_contention_t _LIBCPP_NODEBUG = int32_t; | ||
|
||
template <class _Tp> | ||
struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_standard_layout<_Tp>::value && sizeof(_Tp) == 4> > : true_type {}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe what you mean here is just trivially copyable? What properties of the type do you use?
Or do we need has_unique_object_representations
? If an object has padding bytes in it, we probably don't want to consider them for the purpose of atomic-waiting?
static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const volatile* __val) { | ||
static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value"); | ||
if constexpr (_Size == 4) | ||
__ulock_wait(UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const volatile*>(__val), 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the dereference after this reinterpret_cast
is UB since the object is not actually a uint32_t
. Instead, I think we'd need to memcpy
the bytes of that object into a local buffer and then pass that to __ulock_wait
using a bit_cast<uint64_t>
. Under the hood, I wonder how the compiler's going to code-gen that. Will it pass by register?
That should be easy to Godbolt by stubbing the __ulock_wait
signature.
/* When the incoming atomic happens to be the platform wait size, we still need to use the | ||
table for the contention detection, but we can use the atomic directly for the wait. */ | ||
|
||
_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For backwards compatibility, we need to retain the old implementation. You can move them to a new .cpp
file with a comment, or just move them to the end of this file with a comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should introduce this behind an ABI macro. I think the easiest way to do that is probably to constrain the set of types that you perform this optimization for to be just uin64_t
when we're being ABI compatible.
Also, we're introducing new symbols to the dylib. That means we need to handle back deployment. You'll need to add an entry to __configuration/availability.h
(we have examples there), and only call your new symbols in the dylib when the deployment target supports that. It's not difficult, but it will be a bit of a challenge to refactor things so that you can still call the old symbols.
This is to address #146145
This PR is not in an mergeable state, as it breaks ABI and needs some ABI macros.
The issue before was that, for
std::atomic::wait/notify
, we only supportuint64_t
to go through the nativeulock_wait
directly. Any other types will go through the global contention table'satomic
, increasing the chances of spurious wakeup. This PR tries to allow any types that are of size 4 or 8 to directly go to theulock_wait
.This PR is just proof of concept. If we like this idea, I can go further to update the Linux/FreeBSD branch and add ABI macros so the existing behaviours are reserved under the stable ABI
Here are some benchmark results