@@ -16,126 +16,205 @@ namespace __ESIMD_DNS {
1616// This function implements atomic update of pre-existing variable in the
1717// absense of C++ 20's atomic_ref.
1818
19- template <typename Ty> Ty atomic_load (Ty *ptr) {
19+ // __atomic_* functions support only integral types. In order to
20+ // support floating types for certain operations like min/max,
21+ // 'cmpxchg' operation is applied for result values using
22+ // 'bridging' variables in integral type.
23+ template <typename Ty> using CmpxchgTy = __ESIMD_DNS::uint_type_t <sizeof (Ty)>;
24+
25+ template <typename Ty> inline Ty atomic_load (Ty *ptr) {
2026#ifdef _WIN32
2127 // TODO: Windows will be supported soon
2228 __ESIMD_UNSUPPORTED_ON_HOST;
2329#else
24- return __atomic_load (ptr, __ATOMIC_SEQ_CST);
30+ __ESIMD_UNSUPPORTED_ON_HOST;
31+ // TODO : Enable with unit test
32+ /* return sycl::bit_cast<Ty>(__atomic_load_n((CmpxchgTy<Ty> *)ptr,
33+ __ATOMIC_SEQ_CST)); */
2534#endif
2635}
2736
28- template <typename Ty> Ty atomic_store (Ty *ptr, Ty val) {
37+ template <typename Ty> inline Ty atomic_store (Ty *ptr, Ty val) {
2938#ifdef _WIN32
3039 // TODO: Windows will be supported soon
3140 __ESIMD_UNSUPPORTED_ON_HOST;
3241#else
33- __atomic_store (ptr, val, __ATOMIC_SEQ_CST);
42+ Ty ret = atomic_load<Ty>((CmpxchgTy<Ty> *)ptr);
43+ __atomic_store_n ((CmpxchgTy<Ty> *)ptr, val, __ATOMIC_SEQ_CST);
44+ return ret;
3445#endif
3546}
3647
37- template <typename Ty> Ty atomic_add_fetch (Ty *ptr, Ty val) {
48+ template <typename Ty> inline Ty atomic_add (Ty *ptr, Ty val) {
3849#ifdef _WIN32
3950 // TODO: Windows will be supported soon
4051 __ESIMD_UNSUPPORTED_ON_HOST;
4152#else
42- return __atomic_add_fetch (ptr, val, __ATOMIC_SEQ_CST);
53+ if constexpr (std::is_integral_v<Ty>) {
54+ return __atomic_fetch_add (ptr, val, __ATOMIC_SEQ_CST);
55+ } else {
56+ // For Floating type
57+ Ty _old, _new;
58+ CmpxchgTy<Ty> _old_bits, _new_bits;
59+ do {
60+ _old = *ptr;
61+ _new = _old + val;
62+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
63+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
64+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
65+ _new_bits, false , __ATOMIC_SEQ_CST,
66+ __ATOMIC_SEQ_CST));
67+ return _old;
68+ }
4369#endif
4470}
4571
46- template <typename Ty> Ty atomic_sub_fetch (Ty *ptr, Ty val) {
72+ template <typename Ty> inline Ty atomic_sub (Ty *ptr, Ty val) {
4773#ifdef _WIN32
4874 // TODO: Windows will be supported soon
4975 __ESIMD_UNSUPPORTED_ON_HOST;
5076#else
51- return __atomic_sub_fetch (ptr, val, __ATOMIC_SEQ_CST);
77+ if constexpr (std::is_integral_v<Ty>) {
78+ return __atomic_fetch_sub (ptr, val, __ATOMIC_SEQ_CST);
79+ } else {
80+ // For Floating type
81+ Ty _old, _new;
82+ CmpxchgTy<Ty> _old_bits, _new_bits;
83+ do {
84+ _old = *ptr;
85+ _new = _old - val;
86+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
87+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
88+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
89+ _new_bits, false , __ATOMIC_SEQ_CST,
90+ __ATOMIC_SEQ_CST));
91+ return _old;
92+ }
5293#endif
5394}
5495
55- template <typename Ty> Ty atomic_and_fetch (Ty *ptr, Ty val) {
96+ template <typename Ty> inline Ty atomic_and (Ty *ptr, Ty val) {
5697#ifdef _WIN32
5798 // TODO: Windows will be supported soon
5899 __ESIMD_UNSUPPORTED_ON_HOST;
59100#else
60- return __atomic_and_fetch (ptr, val, __ATOMIC_SEQ_CST);
101+ static_assert (std::is_integral<Ty>::value);
102+ return __atomic_fetch_and (ptr, val, __ATOMIC_SEQ_CST);
61103#endif
62104}
63105
64- template <typename Ty> Ty atomic_or_fetch (Ty *ptr, Ty val) {
106+ template <typename Ty> inline Ty atomic_or (Ty *ptr, Ty val) {
65107#ifdef _WIN32
66108 // TODO: Windows will be supported soon
67109 __ESIMD_UNSUPPORTED_ON_HOST;
68110#else
69- return __atomic_or_fetch (ptr, val, __ATOMIC_SEQ_CST);
111+ static_assert (std::is_integral<Ty>::value);
112+ return __atomic_fetch_or (ptr, val, __ATOMIC_SEQ_CST);
70113#endif
71114}
72115
73- template <typename Ty> Ty atomic_xor_fetch (Ty *ptr, Ty val) {
116+ template <typename Ty> inline Ty atomic_xor (Ty *ptr, Ty val) {
74117#ifdef _WIN32
75118 // TODO: Windows will be supported soon
76119 __ESIMD_UNSUPPORTED_ON_HOST;
77120#else
78- return __atomic_xor_fetch (ptr, val, __ATOMIC_SEQ_CST);
121+ static_assert (std::is_integral<Ty>::value);
122+ return __atomic_fetch_xor (ptr, val, __ATOMIC_SEQ_CST);
79123#endif
80124}
81125
82- template <typename Ty> Ty atomic_min (Ty *ptr, Ty val) {
126+ template <typename Ty> inline Ty atomic_min (Ty *ptr, Ty val) {
83127#ifdef _WIN32
84128 // TODO: Windows will be supported soon
85129 __ESIMD_UNSUPPORTED_ON_HOST;
86130#else
87- // TODO FIXME: fix implementation for FP types.
88131 if constexpr (std::is_integral_v<Ty>) {
89132 Ty _old, _new;
90133 do {
91134 _old = *ptr;
92135 _new = std::min<Ty>(_old, val);
93136 } while (!__atomic_compare_exchange_n (ptr, &_old, _new, false ,
94137 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
95- return _new ;
138+ return _old ;
96139 } else {
97- __ESIMD_UNSUPPORTED_ON_HOST;
140+ Ty _old, _new;
141+ CmpxchgTy<Ty> _old_bits, _new_bits;
142+ do {
143+ _old = *ptr;
144+ _new = std::min (_old, val);
145+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
146+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
147+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &_old_bits,
148+ _new_bits, false , __ATOMIC_SEQ_CST,
149+ __ATOMIC_SEQ_CST));
150+ return _old;
98151 }
99152#endif
100153}
101154
102- template <typename Ty> Ty atomic_max (Ty *ptr, Ty val) {
155+ template <typename Ty> inline Ty atomic_max (Ty *ptr, Ty val) {
103156#ifdef _WIN32
104157 // TODO: Windows will be supported soon
105158 __ESIMD_UNSUPPORTED_ON_HOST;
106159#else
107- // TODO FIXME: fix implementation for FP types.
108160 if constexpr (std::is_integral_v<Ty>) {
109161 Ty _old, _new;
110162 do {
111163 _old = *ptr;
112164 _new = std::max<Ty>(_old, val);
113165 } while (!__atomic_compare_exchange_n (ptr, &_old, _new, false ,
114166 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
115- return _new ;
167+ return _old ;
116168 } else {
117- __ESIMD_UNSUPPORTED_ON_HOST;
169+ Ty _old, _new;
170+ CmpxchgTy<Ty> _old_bits, _new_bits;
171+ do {
172+ _old = *ptr;
173+ _new = std::max (_old, val);
174+ _old_bits = *(CmpxchgTy<Ty> *)&_old;
175+ _new_bits = *(CmpxchgTy<Ty> *)&_new;
176+ } while (!__atomic_compare_exchange_n ((CmpxchgTy<Ty> *)(CmpxchgTy<Ty> *)ptr,
177+ &_old_bits, _new_bits, false ,
178+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST));
179+ return _old;
118180 }
119181#endif
120182}
121183
122- template <typename Ty> Ty atomic_cmpxchg (Ty *ptr, Ty expected, Ty desired) {
184+ template <typename Ty>
185+ inline Ty atomic_cmpxchg (Ty *ptr, Ty expected, Ty desired) {
123186#ifdef _WIN32
124187 // TODO: Windows will be supported soon
125188 __ESIMD_UNSUPPORTED_ON_HOST;
126189#else
127- // TODO FIXME: fix implementation for FP types.
128190 if constexpr (std::is_integral_v<Ty>) {
129- Ty _old = expected;
130- __atomic_compare_exchange_n (ptr, &_old , desired, false , __ATOMIC_SEQ_CST,
191+ Ty local = expected;
192+ __atomic_compare_exchange_n (ptr, &local , desired, false , __ATOMIC_SEQ_CST,
131193 __ATOMIC_SEQ_CST);
132- return *ptr;
194+ // if exchange occured, this means 'local=expected=*ptr'. So local
195+ // is returned as old val
196+ // if exchange did not occur, *ptr value compared against 'local'
197+ // is stored in 'local'. So local is returned as old val
198+ return local;
133199 } else {
134- __ESIMD_UNSUPPORTED_ON_HOST;
200+ CmpxchgTy<Ty> desired_bits = *(CmpxchgTy<Ty> *)&desired;
201+ CmpxchgTy<Ty> local_bits = *(CmpxchgTy<Ty> *)&expected;
202+ __atomic_compare_exchange_n ((CmpxchgTy<Ty> *)ptr, &local_bits, desired_bits,
203+ false , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
204+ return *((Ty *)&local_bits);
135205 }
136206#endif
137207}
138208
209+ inline void atomic_fence () {
210+ #ifdef _WIN32
211+ // TODO: Windows will be supported soon
212+ __ESIMD_UNSUPPORTED_ON_HOST;
213+ #else
214+ __atomic_thread_fence (__ATOMIC_SEQ_CST);
215+ #endif
216+ }
217+
139218} // namespace __ESIMD_DNS
140219
141220// / @endcond ESIMD_DETAIL
0 commit comments