@@ -41,6 +41,29 @@ inline T1 atomic_fetch_add(T1 *addr, T2 operand) {
41
41
return atm.fetch_add (operand);
42
42
}
43
43
44
+ template <sycl::access::address_space addressSpace =
45
+ sycl::access::address_space::global_space,
46
+ sycl::memory_order memoryOrder = sycl::memory_order::relaxed,
47
+ sycl::memory_scope memoryScope = sycl::memory_scope::device>
48
+ inline sycl::half2 atomic_fetch_add (sycl::half2 *addr, sycl::half2 operand) {
49
+ auto atm = sycl::atomic_ref<unsigned , memoryOrder, memoryScope, addressSpace>(
50
+ *reinterpret_cast <unsigned *>(addr));
51
+
52
+ union {
53
+ unsigned i;
54
+ sycl::half2 h;
55
+ } old{0 }, output{0 };
56
+
57
+ while (true ) {
58
+ old.i = atm.load ();
59
+ output.h = old.h + operand;
60
+ if (atm.compare_exchange_strong (old.i , output.i ))
61
+ break ;
62
+ }
63
+
64
+ return output.h ;
65
+ }
66
+
44
67
// / Atomically add the value operand to the value at the addr and assign the
45
68
// / result to the value at addr.
46
69
// / \param [in, out] addr The pointer to the data.
@@ -695,10 +718,14 @@ template <typename T> struct IsValidAtomicType {
695
718
696
719
template <typename T,
697
720
sycl::memory_scope DefaultScope = sycl::memory_scope::system,
721
+ #ifdef __AMDGPU__
722
+ sycl::memory_order DefaultOrder = sycl::memory_order::acq_rel,
723
+ #else
698
724
sycl::memory_order DefaultOrder = sycl::memory_order::seq_cst,
725
+ #endif
699
726
sycl::access::address_space Space =
700
727
sycl::access::address_space::generic_space>
701
- class atomic {
728
+ class atomic {
702
729
static_assert (
703
730
detail::IsValidAtomicType<T>::value,
704
731
" Invalid atomic type. Valid types are int, unsigned int, long, "
0 commit comments