[SYCL] Attempt to resolve synchronization issue

HPS-1 · HPS-1 · commit bcc6eb43edcf · 2024-12-15T17:07:25.000-08:00
Signed-off-by: Hu, Peisen &lt;peisen.hu@intel.com&gt;
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -2,7 +2,7 @@
 // XFAIL: (opencl && !cpu && !accelerator)
 // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
 
-// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
+// RUN: %{build} -Wno-deprecated-declarations -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
 
 // Disabled temporarily while investigation into the failure is ongoing.
@@ -73,16 +73,13 @@ void testRootGroup() {
   sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
   const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
   struct TestKernel1 {
-    sycl::buffer<int> *m_dataBuf;
-    sycl::handler *m_h;
-    TestKernel1(sycl::buffer<int> *dataBuf, sycl::handler *h)
-        : m_dataBuf(dataBuf), m_h(h) {}
+    sycl::accessor *m_data;
+    TestKernel1(sycl::accessor *data) : m_data(data) {}
     void operator()(sycl::nd_item<1> it) const {
-      sycl::accessor data{*m_dataBuf, *m_h};
       volatile float X = 1.0f;
       volatile float Y = 1.0f;
       auto root = it.ext_oneapi_get_root_group();
-      data[root.get_local_id()] = root.get_local_id();
+      *m_data[root.get_local_id()] = root.get_local_id();
       sycl::group_barrier(root);
       // Delay half of the workgroups with extra work to check that the barrier
       // synchronizes the whole device.
@@ -92,10 +89,10 @@ void testRootGroup() {
       }
       root =
           sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
-      int sum = data[root.get_local_id()] +
-                data[root.get_local_range() - root.get_local_id() - 1];
+      int sum = *m_data[root.get_local_id()] +
+                *m_data[root.get_local_range() - root.get_local_id() - 1];
       sycl::group_barrier(root);
-      data[root.get_local_id()] = sum;
+      *m_data[root.get_local_id()] = sum;
     }
     auto get(sycl::ext::oneapi::experimental::properties_tag) {
       return sycl::ext::oneapi::experimental::properties{
@@ -104,7 +101,8 @@ void testRootGroup() {
     }
   };
   q.submit([&](sycl::handler &h) {
-    h.parallel_for<class RootGroupKernel>(range, TestKernel1(&dataBuf, &h));
+    sycl::accessor data{dataBuf, h};
+    h.parallel_for<class RootGroupKernel>(range, TestKernel1(&data));
   });
   sycl::host_accessor data{dataBuf};
   const int workItemCount = static_cast<int>(range.get_global_range().size());
@@ -127,25 +125,24 @@ void testRootGroupFunctions() {
   sycl::buffer<bool> testResultsBuf{sycl::range{testCount}};
   const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
   struct TestKernel2 {
-    sycl::buffer<bool> *m_testResultsBuf;
-    sycl::handler *m_h;
-    TestKernel2(sycl::buffer<bool> *testResultsBuf, sycl::handler *h)
-        : m_testResultsBuf(testResultsBuf), m_h(h) {}
+    sycl::accessor *m_testResults;
+    TestKernel2(sycl::accessor *testResults) : m_testResults(testResults) {}
     void operator()(sycl::nd_item<1> it) const {
-      sycl::accessor testResults{*m_testResultsBuf, *m_h};
       const auto root = it.ext_oneapi_get_root_group();
       if (root.leader() || root.get_local_id() == 3) {
-        testResults[0] = root.get_group_id() == sycl::id<1>(0);
-        testResults[1] = root.leader() ? root.get_local_id() == sycl::id<1>(0)
-                                       : root.get_local_id() == sycl::id<1>(3);
-        testResults[2] = root.get_group_range() == sycl::range<1>(1);
-        testResults[3] = root.get_local_range() == it.get_global_range();
-        testResults[4] = root.get_max_local_range() == root.get_local_range();
-        testResults[5] = root.get_group_linear_id() == 0;
-        testResults[6] =
+        *m_testResults[0] = root.get_group_id() == sycl::id<1>(0);
+        *m_testResults[1] = root.leader()
+                                ? root.get_local_id() == sycl::id<1>(0)
+                                : root.get_local_id() == sycl::id<1>(3);
+        *m_testResults[2] = root.get_group_range() == sycl::range<1>(1);
+        *m_testResults[3] = root.get_local_range() == it.get_global_range();
+        *m_testResults[4] =
+            root.get_max_local_range() == root.get_local_range();
+        *m_testResults[5] = root.get_group_linear_id() == 0;
+        *m_testResults[6] =
             root.get_local_linear_id() == root.get_local_id().get(0);
-        testResults[7] = root.get_group_linear_range() == 1;
-        testResults[8] =
+        *m_testResults[7] = root.get_group_linear_range() == 1;
+        *m_testResults[8] =
             root.get_local_linear_range() == root.get_local_range().size();
       }
     }
@@ -156,8 +153,9 @@ void testRootGroupFunctions() {
   };
 
   q.submit([&](sycl::handler &h) {
-    h.parallel_for<class RootGroupFunctionsKernel>(
-        range, TestKernel2(&testResultsBuf, &h));
+    sycl::accessor testResults{testResultsBuf, h};
+    h.parallel_for<class RootGroupFunctionsKernel>(range,
+                                                   TestKernel2(&testResults));
   });
   sycl::host_accessor testResults{testResultsBuf};
   for (int i = 0; i < testCount; i++) {
diff --git a/sycl/test-e2e/Properties/cache_config.cpp b/sycl/test-e2e/Properties/cache_config.cpp
@@ -51,7 +51,7 @@ int main() {
   sycl::ext::oneapi::experimental::properties properties{
       cache_config(large_slm)};
 
-  // CHECK: parallel_for with sycl::range and KernelFunctor
+  // CHECK: parallel_for with sycl::range
   // CHECK: ZE ---> zeKernelSetCacheConfig
   std::cout << "parallel_for with sycl::range" << std::endl;
   q.parallel_for(range<2>{16, 16}, RangeKernelFunctor{}).wait();