Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit 0f822ff

Browse files
committed
Merge internal changes from CTK 11.7 to libcudacxx.
1 parent 05d48aa commit 0f822ff

File tree

3 files changed

+20
-19
lines changed

3 files changed

+20
-19
lines changed

include/cuda/std/detail/__config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
#endif
8989
#define _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP
9090
#define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(size, ptr) (size <= 8)
91+
#define _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS
9192

9293
#define _LIBCUDACXX_SYS_CLOCK_DURATION nanoseconds
9394

include/cuda/std/detail/libcxx/include/semaphore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class __atomic_semaphore_base
105105
_LIBCUDACXX_INLINE_VISIBILITY
106106
bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
107107
{
108-
return __libcpp_thread_poll_with_backoff([=]() {
108+
return __libcpp_thread_poll_with_backoff([this]() {
109109
ptrdiff_t const __old = __count.load(memory_order_acquire);
110110
return __old != 0 && __fetch_sub_if_slow(__old);
111111
}, __rel_time);
@@ -180,7 +180,7 @@ class __atomic_semaphore_base<_Sco, 1> {
180180
_LIBCUDACXX_INLINE_VISIBILITY
181181
bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time)
182182
{
183-
return __libcpp_thread_poll_with_backoff([=]() {
183+
return __libcpp_thread_poll_with_backoff([this]() {
184184
return try_acquire();
185185
}, __rel_time);
186186
}

include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ inline
7272
void __cxx_atomic_thread_fence(memory_order __order) {
7373
NV_DISPATCH_TARGET(
7474
NV_IS_DEVICE, (
75-
__atomic_thread_fence_cuda(__order, __thread_scope_system_tag());
75+
__atomic_thread_fence_cuda(static_cast<__memory_order_underlying_t>(__order), __thread_scope_system_tag());
7676
),
7777
NV_IS_HOST, (
7878
__host::__cxx_atomic_thread_fence(__order);
@@ -85,7 +85,7 @@ inline
8585
void __cxx_atomic_signal_fence(memory_order __order) {
8686
NV_DISPATCH_TARGET(
8787
NV_IS_DEVICE, (
88-
__atomic_signal_fence_cuda(__order);
88+
__atomic_signal_fence_cuda(static_cast<__memory_order_underlying_t>(__order));
8989
),
9090
NV_IS_HOST, (
9191
__host::__cxx_atomic_signal_fence(__order);
@@ -181,7 +181,7 @@ __host__ __device__
181181
alignas(_Tp) auto __tmp = __val;
182182
NV_DISPATCH_TARGET(
183183
NV_IS_DEVICE, (
184-
__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>());
184+
__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
185185
),
186186
NV_IS_HOST, (
187187
__host::__cxx_atomic_store(&__a->__a_value, __tmp, __order);
@@ -194,7 +194,7 @@ __host__ __device__
194194
_Tp __cxx_atomic_load(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a, memory_order __order) {
195195
NV_DISPATCH_TARGET(
196196
NV_IS_DEVICE, (
197-
return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), __order, __scope_tag<_Sco>());
197+
return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
198198
),
199199
NV_IS_HOST, (
200200
return __host::__cxx_atomic_load(&__a->__a_value, __order);
@@ -208,7 +208,7 @@ __host__ __device__
208208
alignas(_Tp) auto __tmp = __val;
209209
NV_DISPATCH_TARGET(
210210
NV_IS_DEVICE, (
211-
return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>());
211+
return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
212212
),
213213
NV_IS_HOST, (
214214
return __host::__cxx_atomic_exchange(&__a->__a_value, __tmp, __order);
@@ -224,7 +224,7 @@ __host__ __device__
224224
NV_DISPATCH_TARGET(
225225
NV_IS_DEVICE, (
226226
alignas(_Tp) auto __tmp_v = __val;
227-
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, __success, __failure, __scope_tag<_Sco>());
227+
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>());
228228
),
229229
NV_IS_HOST, (
230230
__result = __host::__cxx_atomic_compare_exchange_strong(&__a->__a_value, &__tmp, __val, __success, __failure);
@@ -242,7 +242,7 @@ __host__ __device__
242242
NV_DISPATCH_TARGET(
243243
NV_IS_DEVICE, (
244244
alignas(_Tp) auto __tmp_v = __val;
245-
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, __success, __failure, __scope_tag<_Sco>());
245+
__result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>());
246246
),
247247
NV_IS_HOST, (
248248
__result = __host::__cxx_atomic_compare_exchange_weak(&__a->__a_value, &__tmp, __val, __success, __failure);
@@ -257,7 +257,7 @@ __host__ __device__
257257
_Tp __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) {
258258
NV_DISPATCH_TARGET(
259259
NV_IS_DEVICE, (
260-
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
260+
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
261261
),
262262
NV_IS_HOST, (
263263
return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);
@@ -270,7 +270,7 @@ __host__ __device__
270270
_Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) {
271271
NV_DISPATCH_TARGET(
272272
NV_IS_DEVICE, (
273-
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
273+
return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
274274
),
275275
NV_IS_HOST, (
276276
return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);
@@ -283,7 +283,7 @@ __host__ __device__
283283
_Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) {
284284
NV_DISPATCH_TARGET(
285285
NV_IS_DEVICE, (
286-
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
286+
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
287287
),
288288
NV_IS_HOST, (
289289
return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);
@@ -296,7 +296,7 @@ __host__ __device__
296296
_Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) {
297297
NV_DISPATCH_TARGET(
298298
NV_IS_DEVICE, (
299-
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>());
299+
return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
300300
),
301301
NV_IS_HOST, (
302302
return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);
@@ -309,7 +309,7 @@ __host__ __device__
309309
_Tp __cxx_atomic_fetch_and(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
310310
NV_DISPATCH_TARGET(
311311
NV_IS_DEVICE, (
312-
return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
312+
return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
313313
),
314314
NV_IS_HOST, (
315315
return __host::__cxx_atomic_fetch_and(&__a->__a_value, __pattern, __order);
@@ -322,7 +322,7 @@ __host__ __device__
322322
_Tp __cxx_atomic_fetch_or(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
323323
NV_DISPATCH_TARGET(
324324
NV_IS_DEVICE, (
325-
return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
325+
return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
326326
),
327327
NV_IS_HOST, (
328328
return __host::__cxx_atomic_fetch_or(&__a->__a_value, __pattern, __order);
@@ -335,7 +335,7 @@ __host__ __device__
335335
_Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) {
336336
NV_DISPATCH_TARGET(
337337
NV_IS_DEVICE, (
338-
return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>());
338+
return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
339339
),
340340
NV_IS_HOST, (
341341
return __host::__cxx_atomic_fetch_xor(&__a->__a_value, __pattern, __order);
@@ -348,7 +348,7 @@ __host__ __device__
348348
_Tp __cxx_atomic_fetch_max(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) {
349349
NV_IF_TARGET(
350350
NV_IS_DEVICE, (
351-
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
351+
return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
352352
), (
353353
return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);
354354
)
@@ -360,7 +360,7 @@ __host__ __device__
360360
_Tp __cxx_atomic_fetch_min(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) {
361361
NV_IF_TARGET(
362362
NV_IS_DEVICE, (
363-
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>());
363+
return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>());
364364
), (
365365
return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);
366366
)
@@ -428,7 +428,7 @@ __host__ __device__ inline bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_
428428
auto const __actual = __cxx_small_from_32<_Tp>(__temp);
429429
if(!__ret) {
430430
if(0 == __cuda_memcmp(&__actual, __expected, sizeof(_Tp)))
431-
__cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order::memory_order_relaxed);
431+
__cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order_relaxed);
432432
else
433433
*__expected = __actual;
434434
}

0 commit comments

Comments
 (0)