Add a small parity wait test

wmaxey · wmaxey · commit 1464783cccad · 2021-07-27T15:07:31.000-07:00
diff --git a/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp b/.upstream-tests/test/heterogeneous/barrier_parity.pass.cpp
@@ -0,0 +1,104 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: nvrtc, pre-sm-70
+
+// uncomment for a really verbose output detailing what test steps are being launched
+// #define DEBUG_TESTERS
+
+#include "helpers.h"
+
+#include <atomic>
+#include <cuda/barrier>
+
+template<typename Barrier>
+struct barrier_and_token
+{
+    using barrier_t = Barrier;
+    using token_t = typename barrier_t::arrival_token;
+
+    barrier_t barrier;
+    cuda::std::atomic<bool> parity_waiting{false};
+
+    template<typename ...Args>
+    __host__ __device__
+    barrier_and_token(Args && ...args) : barrier{ cuda::std::forward<Args>(args)... }
+    {
+    }
+};
+
+struct barrier_arrive_and_wait
+{
+    using async = cuda::std::true_type;
+
+    template<typename Data>
+    __host__ __device__
+    static void perform(Data & data)
+    {
+        while (data.parity_waiting.load(cuda::std::memory_order_acquire) == false)
+        {
+            data.parity_waiting.wait(false);
+        }
+        data.barrier.arrive_and_wait();
+    }
+};
+
+template <bool Phase>
+struct barrier_arrive_parity_wait
+{
+    using async = cuda::std::true_type;
+
+    template<typename Data>
+    __host__ __device__
+    static void perform(Data & data)
+    {
+        data.parity_waiting.store(true, cuda::std::memory_order_release);
+        data.parity_waiting.notify_all();
+        cuda::barrier_wait_parity(&data.barrier, Phase);
+    }
+};
+
+struct clear_token
+{
+    template<typename Data>
+    __host__ __device__
+    static void perform(Data & data)
+    {
+        data.parity_waiting.store(false, cuda::std::memory_order_release);
+    }
+};
+
+using aw_aw_pw = performer_list<
+    barrier_arrive_and_wait,
+    barrier_arrive_and_wait,
+    barrier_arrive_parity_wait<false>,
+    async_tester_fence,
+    clear_token
+>;
+
+void kernel_invoker()
+{
+    validate_not_movable<
+        barrier_and_token<cuda::std::barrier<>>,
+        aw_aw_pw
+      >(2);
+    validate_not_movable<
+        barrier_and_token<cuda::barrier<cuda::thread_scope_system>>,
+        aw_aw_pw
+      >(2);
+}
+
+int main(int arg, char ** argv)
+{
+#ifndef __CUDA_ARCH__
+    kernel_invoker();
+#endif
+
+    return 0;
+}
+