@@ -209,12 +209,6 @@ class __barrier_base {
209209 _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_base<ptrdiff_t , _Sco> __expected, __arrived;
210210 _LIBCUDACXX_BARRIER_ALIGNMENTS _CompletionF __completion;
211211 _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_base<bool , _Sco> __phase;
212-
213- _LIBCUDACXX_INLINE_VISIBILITY
214- bool __try_wait_phase (bool __old_phase) const
215- {
216- return __phase.load (memory_order_acquire) != __old_phase;
217- }
218212public:
219213 using arrival_token = bool ;
220214
@@ -247,15 +241,11 @@ public:
247241 return __old_phase;
248242 }
249243 _LIBCUDACXX_INLINE_VISIBILITY
250- bool try_wait (arrival_token __old) const
251- {
252- return __try_wait_phase (__old);
253- }
254- _LIBCUDACXX_INLINE_VISIBILITY
255- bool __try_wait_parity (bool __parity) const
244+ bool __try_wait (arrival_token __old_phase) const
256245 {
257- return __try_wait_phase (__parity) ;
246+ return __phase != __old_phase ;
258247 }
248+
259249 _LIBCUDACXX_INLINE_VISIBILITY
260250 void wait (arrival_token&& __old_phase) const
261251 {
@@ -280,42 +270,6 @@ public:
280270 }
281271};
282272
283- template <class __Barrier >
284- struct __barrier_poll_tester {
285- __Barrier const * __this;
286- typename __Barrier::arrival_token __phase;
287-
288- _LIBCUDACXX_INLINE_VISIBILITY
289- __barrier_poll_tester (__Barrier const * __this_, typename __Barrier::arrival_token&& __phase_)
290- : __this(__this_)
291- , __phase(_CUDA_VSTD::move(__phase_))
292- {}
293-
294- _LIBCUDACXX_INLINE_VISIBILITY
295- bool operator ()() const
296- {
297- return __this->try_wait (__phase);
298- }
299- };
300-
301- template <class __Barrier >
302- struct __barrier_poll_tester_parity {
303- __Barrier const * __this;
304- bool __parity;
305-
306- _LIBCUDACXX_INLINE_VISIBILITY
307- __barrier_poll_tester_parity (__Barrier const * __this_, bool __parity_)
308- : __this(__this_)
309- , __parity(__parity_)
310- {}
311-
312- inline _LIBCUDACXX_INLINE_VISIBILITY
313- bool operator ()() const
314- {
315- return __this->try_wait_parity (__parity);
316- }
317- };
318-
319273template <int _Sco>
320274class __barrier_base <__empty_completion, _Sco> {
321275
@@ -331,23 +285,29 @@ public:
331285 using arrival_token = uint64_t ;
332286
333287private:
334- static _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
288+ struct __poll_tester {
289+ __barrier_base const * __this;
290+ arrival_token __phase;
291+
292+ _LIBCUDACXX_INLINE_VISIBILITY
293+ __poll_tester (__barrier_base const * __this_, arrival_token&& __phase_)
294+ : __this(__this_)
295+ , __phase(_CUDA_VSTD::move(__phase_))
296+ {}
297+
298+ inline _LIBCUDACXX_INLINE_VISIBILITY
299+ bool operator ()() const
300+ {
301+ return __this->__try_wait (__phase);
302+ }
303+ };
304+
305+ static inline _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_CONSTEXPR
335306 uint64_t __init (ptrdiff_t __count) _NOEXCEPT
336307 {
337308 return (((1u << 31 ) - __count) << 32 )
338309 | ((1u << 31 ) - __count);
339310 }
340- _LIBCUDACXX_INLINE_VISIBILITY
341- bool __try_wait_phase (uint64_t __phase) const
342- {
343- uint64_t const __current = __phase_arrived_expected.load (memory_order_acquire);
344- return ((__current & __phase_bit) != __phase);
345- }
346- _LIBCUDACXX_INLINE_VISIBILITY
347- bool __try_wait_parity (bool __parity) const
348- {
349- return __try_wait_phase (__parity ? __phase_bit : 0 );
350- }
351311
352312public:
353313 __barrier_base () = default ;
@@ -363,20 +323,10 @@ public:
363323 __barrier_base& operator =(__barrier_base const &) = delete ;
364324
365325 _LIBCUDACXX_INLINE_VISIBILITY
366- bool __try_wait (arrival_token __old) const
367- {
368- return __try_wait_phase (__old & __phase_bit);
369- }
370-
371- _LIBCUDACXX_INLINE_VISIBILITY
372- bool try_wait_parity (bool __parity) const
373- {
374- return __try_wait_parity (__parity);
375- }
376- _LIBCUDACXX_INLINE_VISIBILITY
377- bool try_wait (arrival_token __old) const
326+ bool __try_wait (arrival_token __phase) const
378327 {
379- return __try_wait (__old);
328+ uint64_t const __current = __phase_arrived_expected.load (memory_order_acquire);
329+ return ((__current & __phase_bit) != __phase);
380330 }
381331
382332 _LIBCUDACXX_NODISCARD_ATTRIBUTE inline _LIBCUDACXX_INLINE_VISIBILITY
@@ -390,22 +340,17 @@ public:
390340 }
391341 return __old & __phase_bit;
392342 }
393- _LIBCUDACXX_INLINE_VISIBILITY
343+ inline _LIBCUDACXX_INLINE_VISIBILITY
394344 void wait (arrival_token&& __phase) const
395345 {
396- __libcpp_thread_poll_with_backoff (__barrier_poll_tester<__barrier_base> (this , _CUDA_VSTD::move (__phase)));
346+ __libcpp_thread_poll_with_backoff (__poll_tester (this , _CUDA_VSTD::move (__phase)));
397347 }
398- _LIBCUDACXX_INLINE_VISIBILITY
399- void wait_parity (bool __parity) const
400- {
401- __libcpp_thread_poll_with_backoff (__barrier_poll_tester_parity<__barrier_base>(this , __parity));
402- }
403- _LIBCUDACXX_INLINE_VISIBILITY
348+ inline _LIBCUDACXX_INLINE_VISIBILITY
404349 void arrive_and_wait ()
405350 {
406351 wait (arrive ());
407352 }
408- _LIBCUDACXX_INLINE_VISIBILITY
353+ inline _LIBCUDACXX_INLINE_VISIBILITY
409354 void arrive_and_drop ()
410355 {
411356 __phase_arrived_expected.fetch_add (__expected_unit, memory_order_relaxed);
0 commit comments