[SYCL] Fix error irrelevant to use_root_sync

HPS-1 · HPS-1 · commit 79e33ee24933 · 2025-03-31T09:59:15.000-07:00
Signed-off-by: Hu, Peisen &lt;peisen.hu@intel.com&gt;
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -2,7 +2,10 @@
 // XFAIL: (opencl && !cpu && !accelerator)
 // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
 
-// RUN: %{build} -I . -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
+// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
+// https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd
+// remove the flag once the issue is resolved.
+// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
 
 // Disabled temporarily while investigation into the failure is ongoing.
@@ -60,6 +63,34 @@ void testQueriesAndProperties() {
   check_max_num_work_group_sync(maxWGsWithLimits);
 }
 
+template <typename T> struct TestKernel1 {
+  T m_data;
+  TestKernel1(T &data_) : m_data(data_) {}
+  void operator()(sycl::nd_item<1> it) const {
+    volatile float X = 1.0f;
+    volatile float Y = 1.0f;
+    auto root = it.ext_oneapi_get_root_group();
+    m_data[root.get_local_id()] = root.get_local_id();
+    sycl::group_barrier(root);
+    // Delay half of the workgroups with extra work to check that the barrier
+    // synchronizes the whole device.
+    if (it.get_group(0) % 2 == 0) {
+      X += sycl::sin(X);
+      Y += sycl::cos(Y);
+    }
+    root = sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
+    int sum = m_data[root.get_local_id()] +
+              m_data[root.get_local_range() - root.get_local_id() - 1];
+    sycl::group_barrier(root);
+    m_data[root.get_local_id()] = sum;
+  }
+  auto get(sycl::ext::oneapi::experimental::properties_tag) const {
+    return sycl::ext::oneapi::experimental::properties{
+        sycl::ext::oneapi::experimental::use_root_sync};
+    ;
+  }
+};
+
 void testRootGroup() {
   sycl::queue q;
   const auto bundle =
@@ -70,41 +101,13 @@ void testRootGroup() {
           .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
                                    kernel_queue_specific::max_num_work_groups>(
               q, WorkGroupSize, 0);
+  const auto props = sycl::ext::oneapi::experimental::properties{
+      sycl::ext::oneapi::experimental::use_root_sync};
   sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
   const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
-  struct TestKernel1 {
-    sycl::buffer<int> *m_dataBuf;
-    sycl::handler *m_h;
-    TestKernel1(sycl::buffer<int> *dataBuf, sycl::handler *h)
-        : m_dataBuf(dataBuf), m_h(h) {}
-    void operator()(sycl::nd_item<1> it) const {
-      sycl::accessor data{*m_dataBuf, *m_h};
-      volatile float X = 1.0f;
-      volatile float Y = 1.0f;
-      auto root = it.ext_oneapi_get_root_group();
-      data[root.get_local_id()] = root.get_local_id();
-      sycl::group_barrier(root);
-      // Delay half of the workgroups with extra work to check that the barrier
-      // synchronizes the whole device.
-      if (it.get_group(0) % 2 == 0) {
-        X += sycl::sin(X);
-        Y += sycl::cos(Y);
-      }
-      root =
-          sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
-      int sum = data[root.get_local_id()] +
-                data[root.get_local_range() - root.get_local_id() - 1];
-      sycl::group_barrier(root);
-      data[root.get_local_id()] = sum;
-    }
-    auto get(sycl::ext::oneapi::experimental::properties_tag) const {
-      return sycl::ext::oneapi::experimental::properties{
-          sycl::ext::oneapi::experimental::use_root_sync};
-      ;
-    }
-  };
   q.submit([&](sycl::handler &h) {
-    h.parallel_for<class RootGroupKernel>(range, TestKernel1(&dataBuf, &h));
+    sycl::accessor data{dataBuf, h};
+    h.parallel_for<class RootGroupKernel>(range, TestKernel1(data));
   });
   sycl::host_accessor data{dataBuf};
   const int workItemCount = static_cast<int>(range.get_global_range().size());