|
13 | 13 | #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
|
14 | 14 |
|
15 | 15 | #include "DeviceTypes.h"
|
| 16 | +#include "DeviceUtils.h" |
16 | 17 |
|
17 |
| -namespace ompx { |
| 18 | +#pragma omp begin declare target device_type(nohost) |
18 | 19 |
|
| 20 | +namespace ompx { |
19 | 21 | namespace atomic {
|
20 | 22 |
|
21 | 23 | enum OrderingTy {
|
@@ -44,60 +46,134 @@ enum MemScopeTy {
|
44 | 46 | uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
|
45 | 47 | MemScopeTy MemScope = MemScopeTy::all);
|
46 | 48 |
|
47 |
| -template <typename Ty> Ty add(Ty *Address, Ty Val, atomic::OrderingTy Ordering); |
48 |
| - |
49 |
| -template <typename Ty> Ty add_system(Ty *Address, Ty Val, atomic::OrderingTy Ordering); |
50 |
| - |
51 | 49 | /// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
|
52 | 50 | /// result is stored in \p *Addr;
|
53 | 51 | /// {
|
54 | 52 |
|
55 |
| -#define ATOMIC_COMMON_OP(TY) \ |
56 |
| - TY add(TY *Addr, TY V, OrderingTy Ordering); \ |
57 |
| - TY add_system(TY *Addr, TY V, OrderingTy Ordering); \ |
58 |
| - TY mul(TY *Addr, TY V, OrderingTy Ordering); \ |
59 |
| - TY load(TY *Addr, OrderingTy Ordering); \ |
60 |
| - void store(TY *Addr, TY V, OrderingTy Ordering); \ |
61 |
| - bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc, \ |
62 |
| - OrderingTy OrderingFail); |
63 |
| - |
64 |
| -#define ATOMIC_FP_ONLY_OP(TY) \ |
65 |
| - TY min(TY *Addr, TY V, OrderingTy Ordering); \ |
66 |
| - TY max(TY *Addr, TY V, OrderingTy Ordering); |
67 |
| - |
68 |
| -#define ATOMIC_INT_ONLY_OP(TY) \ |
69 |
| - TY min(TY *Addr, TY V, OrderingTy Ordering); \ |
70 |
| - TY max(TY *Addr, TY V, OrderingTy Ordering); \ |
71 |
| - TY bit_or(TY *Addr, TY V, OrderingTy Ordering); \ |
72 |
| - TY bit_and(TY *Addr, TY V, OrderingTy Ordering); \ |
73 |
| - TY bit_xor(TY *Addr, TY V, OrderingTy Ordering); |
74 |
| - |
75 |
| -#define ATOMIC_FP_OP(TY) \ |
76 |
| - ATOMIC_FP_ONLY_OP(TY) \ |
77 |
| - ATOMIC_COMMON_OP(TY) |
78 |
| - |
79 |
| -#define ATOMIC_INT_OP(TY) \ |
80 |
| - ATOMIC_INT_ONLY_OP(TY) \ |
81 |
| - ATOMIC_COMMON_OP(TY) |
82 |
| - |
83 |
| -// This needs to be kept in sync with the header. Also the reason we don't use |
84 |
| -// templates here. |
85 |
| -ATOMIC_INT_OP(int8_t) |
86 |
| -ATOMIC_INT_OP(int16_t) |
87 |
| -ATOMIC_INT_OP(int32_t) |
88 |
| -ATOMIC_INT_OP(int64_t) |
89 |
| -ATOMIC_INT_OP(uint8_t) |
90 |
| -ATOMIC_INT_OP(uint16_t) |
91 |
| -ATOMIC_INT_OP(uint32_t) |
92 |
| -ATOMIC_INT_OP(uint64_t) |
93 |
| -ATOMIC_FP_OP(float) |
94 |
| -ATOMIC_FP_OP(double) |
95 |
| - |
96 |
| -#undef ATOMIC_INT_ONLY_OP |
97 |
| -#undef ATOMIC_FP_ONLY_OP |
98 |
| -#undef ATOMIC_COMMON_OP |
99 |
| -#undef ATOMIC_INT_OP |
100 |
| -#undef ATOMIC_FP_OP |
| 53 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 54 | +bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc, |
| 55 | + atomic::OrderingTy OrderingFail, |
| 56 | + atomic::ScopeTy Scope = ScopeTy::device_) { |
| 57 | + return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false, |
| 58 | + OrderingSucc, OrderingFail, Scope); |
| 59 | +} |
| 60 | + |
| 61 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 62 | +V add(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 63 | + return __scoped_atomic_fetch_add(Address, Val, Ordering, |
| 64 | + __MEMORY_SCOPE_DEVICE); |
| 65 | +} |
| 66 | + |
| 67 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 68 | +V add_system(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 69 | + return __scoped_atomic_fetch_add(Address, Val, Ordering, |
| 70 | + __MEMORY_SCOPE_SYSTEM); |
| 71 | +} |
| 72 | + |
| 73 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 74 | +V load(Ty *Address, atomic::OrderingTy Ordering) { |
| 75 | + return add(Address, Ty(0), Ordering); |
| 76 | +} |
| 77 | + |
| 78 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 79 | +void store(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 80 | + __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE); |
| 81 | +} |
| 82 | + |
| 83 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 84 | +V mul(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 85 | + Ty TypedCurrentVal, TypedResultVal, TypedNewVal; |
| 86 | + bool Success; |
| 87 | + do { |
| 88 | + TypedCurrentVal = atomic::load(Address, Ordering); |
| 89 | + TypedNewVal = TypedCurrentVal * Val; |
| 90 | + Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering, |
| 91 | + atomic::relaxed); |
| 92 | + } while (!Success); |
| 93 | + return TypedResultVal; |
| 94 | +} |
| 95 | + |
| 96 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 97 | +utils::enable_if_t<!utils::is_floating_point_v<V>, V> |
| 98 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 99 | + return __scoped_atomic_fetch_max(Address, Val, Ordering, |
| 100 | + __MEMORY_SCOPE_DEVICE); |
| 101 | +} |
| 102 | + |
| 103 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 104 | +utils::enable_if_t<utils::is_same_v<V, float>, V> |
| 105 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 106 | + if (Val >= 0) |
| 107 | + return utils::bitCast<float>( |
| 108 | + max((int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering)); |
| 109 | + return utils::bitCast<float>( |
| 110 | + min((uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering)); |
| 111 | +} |
| 112 | + |
| 113 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 114 | +utils::enable_if_t<utils::is_same_v<V, double>, V> |
| 115 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 116 | + if (Val >= 0) |
| 117 | + return utils::bitCast<double>( |
| 118 | + max((int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering)); |
| 119 | + return utils::bitCast<double>( |
| 120 | + min((uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering)); |
| 121 | +} |
| 122 | + |
| 123 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 124 | +utils::enable_if_t<!utils::is_floating_point_v<V>, V> |
| 125 | +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 126 | + return __scoped_atomic_fetch_min(Address, Val, Ordering, |
| 127 | + __MEMORY_SCOPE_DEVICE); |
| 128 | +} |
| 129 | + |
| 130 | +// TODO: Implement this with __atomic_fetch_max and remove the duplication. |
| 131 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 132 | +utils::enable_if_t<utils::is_same_v<V, float>, V> |
| 133 | +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 134 | + if (Val >= 0) |
| 135 | + return utils::bitCast<float>( |
| 136 | + min((int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering)); |
| 137 | + return utils::bitCast<float>( |
| 138 | + max((uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering)); |
| 139 | +} |
| 140 | + |
| 141 | +// TODO: Implement this with __atomic_fetch_max and remove the duplication. |
| 142 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 143 | +utils::enable_if_t<utils::is_same_v<V, double>, V> |
| 144 | +min(Ty *Address, utils::remove_addrspace_t<Ty> Val, |
| 145 | + atomic::OrderingTy Ordering) { |
| 146 | + if (Val >= 0) |
| 147 | + return utils::bitCast<double>( |
| 148 | + min((int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering)); |
| 149 | + return utils::bitCast<double>( |
| 150 | + max((uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering)); |
| 151 | +} |
| 152 | + |
| 153 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 154 | +V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 155 | + return __scoped_atomic_fetch_or(Address, Val, Ordering, |
| 156 | + __MEMORY_SCOPE_DEVICE); |
| 157 | +} |
| 158 | + |
| 159 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 160 | +V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 161 | + return __scoped_atomic_fetch_and(Address, Val, Ordering, |
| 162 | + __MEMORY_SCOPE_DEVICE); |
| 163 | +} |
| 164 | + |
| 165 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 166 | +V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 167 | + return __scoped_atomic_fetch_xor(Address, Val, Ordering, |
| 168 | + __MEMORY_SCOPE_DEVICE); |
| 169 | +} |
| 170 | + |
| 171 | +static inline uint32_t atomicExchange(uint32_t *Address, uint32_t Val, |
| 172 | + atomic::OrderingTy Ordering) { |
| 173 | + uint32_t R; |
| 174 | + __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE); |
| 175 | + return R; |
| 176 | +} |
101 | 177 |
|
102 | 178 | ///}
|
103 | 179 |
|
@@ -150,4 +226,6 @@ void system(atomic::OrderingTy Ordering);
|
150 | 226 |
|
151 | 227 | } // namespace ompx
|
152 | 228 |
|
| 229 | +#pragma omp end declare target |
| 230 | + |
153 | 231 | #endif
|
0 commit comments