Skip to content
This repository was archived by the owner on Mar 21, 2024. It is now read-only.

Commit bc80e36

Browse files
committed
Extend the test to measure both phases, make barrier_(try_)wait_parity member functions
1 parent 1464783 commit bc80e36

File tree

3 files changed

+52
-39
lines changed

3 files changed

+52
-39
lines changed

.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct barrier_arrive_and_wait
4949
};
5050

5151
template <bool Phase>
52-
struct barrier_arrive_parity_wait
52+
struct barrier_parity_wait
5353
{
5454
using async = cuda::std::true_type;
5555

@@ -59,7 +59,7 @@ struct barrier_arrive_parity_wait
5959
{
6060
data.parity_waiting.store(true, cuda::std::memory_order_release);
6161
data.parity_waiting.notify_all();
62-
cuda::barrier_wait_parity(&data.barrier, Phase);
62+
data.barrier.wait_parity(Phase);
6363
}
6464
};
6565

@@ -74,9 +74,14 @@ struct clear_token
7474
};
7575

7676
using aw_aw_pw = performer_list<
77+
barrier_parity_wait<false>,
78+
barrier_arrive_and_wait,
79+
barrier_arrive_and_wait,
80+
async_tester_fence,
81+
clear_token,
82+
barrier_parity_wait<true>,
7783
barrier_arrive_and_wait,
7884
barrier_arrive_and_wait,
79-
barrier_arrive_parity_wait<false>,
8085
async_tester_fence,
8186
clear_token
8287
>;

include/cuda/std/barrier

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -75,38 +75,6 @@ _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE
7575

7676
_LIBCUDACXX_BEGIN_NAMESPACE_CUDA
7777

78-
template<class __Barrier>
79-
inline _LIBCUDACXX_INLINE_VISIBILITY
80-
bool barrier_try_wait_parity(__Barrier const* __this, bool __parity)
81-
{
82-
return __this->__try_wait_parity(__parity);
83-
}
84-
85-
template<class __Barrier>
86-
struct __barrier_poll_tester_parity {
87-
__Barrier const* __this;
88-
bool __parity;
89-
90-
_LIBCUDACXX_INLINE_VISIBILITY
91-
__barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_)
92-
: __this(__this_)
93-
, __parity(__parity_)
94-
{}
95-
96-
inline _LIBCUDACXX_INLINE_VISIBILITY
97-
bool operator()() const
98-
{
99-
return barrier_try_wait_parity(__this, __parity);
100-
}
101-
};
102-
103-
template<class __Barrier>
104-
inline _LIBCUDACXX_INLINE_VISIBILITY
105-
void barrier_wait_parity(__Barrier const* __this, bool __parity)
106-
{
107-
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__Barrier>(__this, __parity));
108-
}
109-
11078
template<>
11179
class barrier<thread_scope_block, std::__empty_completion> : public __block_scope_barrier_base {
11280
using __barrier_base = std::__barrier_base<std::__empty_completion, (int)thread_scope_block>;
@@ -161,7 +129,7 @@ public:
161129
else
162130
#endif
163131
{
164-
return __barrier.__try_wait_parity(__parity);
132+
return __barrier.try_wait_parity(__parity);
165133
}
166134
}
167135

@@ -252,6 +220,18 @@ public:
252220
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester<barrier>(this, _CUDA_VSTD::move(__phase)));
253221
}
254222

223+
inline _LIBCUDACXX_INLINE_VISIBILITY
224+
bool try_wait_parity(bool __parity) const
225+
{
226+
return __try_wait_parity(__parity);
227+
}
228+
229+
inline _LIBCUDACXX_INLINE_VISIBILITY
230+
void wait_parity(bool __parity) const
231+
{
232+
_CUDA_VSTD::__libcpp_thread_poll_with_backoff(std::__barrier_poll_tester_parity<barrier>(this, __parity));
233+
}
234+
255235
inline _LIBCUDACXX_INLINE_VISIBILITY
256236
void arrive_and_wait()
257237
{

libcxx/include/barrier

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,24 @@ struct __barrier_poll_tester {
298298
}
299299
};
300300

301+
template<class __Barrier>
302+
struct __barrier_poll_tester_parity {
303+
__Barrier const* __this;
304+
bool __parity;
305+
306+
_LIBCUDACXX_INLINE_VISIBILITY
307+
__barrier_poll_tester_parity(__Barrier const* __this_, bool __parity_)
308+
: __this(__this_)
309+
, __parity(__parity_)
310+
{}
311+
312+
inline _LIBCUDACXX_INLINE_VISIBILITY
313+
bool operator()() const
314+
{
315+
return __this->try_wait_parity(__parity);
316+
}
317+
};
318+
301319
template<int _Sco>
302320
class __barrier_base<__empty_completion, _Sco> {
303321

@@ -325,6 +343,11 @@ private:
325343
uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
326344
return ((__current & __phase_bit) != __phase);
327345
}
346+
_LIBCUDACXX_INLINE_VISIBILITY
347+
bool __try_wait_parity(bool __parity) const
348+
{
349+
return __try_wait_phase(__parity ? __phase_bit : 0);
350+
}
328351

329352
public:
330353
__barrier_base() = default;
@@ -340,9 +363,9 @@ public:
340363
__barrier_base& operator=(__barrier_base const&) = delete;
341364

342365
_LIBCUDACXX_INLINE_VISIBILITY
343-
bool __try_wait_parity(bool __parity) const
366+
bool try_wait_parity(bool __parity) const
344367
{
345-
return __try_wait_phase(__parity ? __phase_bit : 0);
368+
return __try_wait_parity(__parity);
346369
}
347370
_LIBCUDACXX_INLINE_VISIBILITY
348371
bool try_wait(arrival_token __old) const
@@ -364,7 +387,12 @@ public:
364387
_LIBCUDACXX_INLINE_VISIBILITY
365388
void wait(arrival_token&& __phase) const
366389
{
367-
__libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base<__empty_completion, _Sco>>(this, _CUDA_VSTD::move(__phase)));
390+
__libcpp_thread_poll_with_backoff(__barrier_poll_tester<__barrier_base>(this, _CUDA_VSTD::move(__phase)));
391+
}
392+
_LIBCUDACXX_INLINE_VISIBILITY
393+
void wait_parity(bool __parity) const
394+
{
395+
__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity));
368396
}
369397
_LIBCUDACXX_INLINE_VISIBILITY
370398
void arrive_and_wait()

0 commit comments

Comments
 (0)