[SYCL] Address problem with root_group.cpp

HPS-1 · HPS-1 · commit 1358b706f7a8 · 2024-12-20T11:18:36.000-08:00
Signed-off-by: Hu, Peisen &lt;peisen.hu@intel.com&gt;
diff --git a/sycl/test-e2e/GroupAlgorithm/root_group.cpp b/sycl/test-e2e/GroupAlgorithm/root_group.cpp
@@ -2,7 +2,10 @@
 // XFAIL: (opencl && !cpu && !accelerator)
 // XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
 
-// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
+// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
+// https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd
+// remove the flag once the issue is resolved.
+// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
 // RUN: %{run} %t.out
 
 // Disabled temporarily while investigation into the failure is ongoing.
@@ -60,34 +63,6 @@ void testQueriesAndProperties() {
   check_max_num_work_group_sync(maxWGsWithLimits);
 }
 
-template <typename T> struct TestKernel1 {
-  T m_data;
-  TestKernel1(T &data_) : m_data(data_) {}
-  void operator()(sycl::nd_item<1> it) const {
-    volatile float X = 1.0f;
-    volatile float Y = 1.0f;
-    auto root = it.ext_oneapi_get_root_group();
-    m_data[root.get_local_id()] = root.get_local_id();
-    sycl::group_barrier(root);
-    // Delay half of the workgroups with extra work to check that the barrier
-    // synchronizes the whole device.
-    if (it.get_group(0) % 2 == 0) {
-      X += sycl::sin(X);
-      Y += sycl::cos(Y);
-    }
-    root = sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
-    int sum = m_data[root.get_local_id()] +
-              m_data[root.get_local_range() - root.get_local_id() - 1];
-    sycl::group_barrier(root);
-    m_data[root.get_local_id()] = sum;
-  }
-  auto get(sycl::ext::oneapi::experimental::properties_tag) {
-    return sycl::ext::oneapi::experimental::properties{
-        sycl::ext::oneapi::experimental::use_root_sync};
-    ;
-  }
-};
-
 void testRootGroup() {
   sycl::queue q;
   const auto bundle =
@@ -98,11 +73,32 @@ void testRootGroup() {
           .ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
                                    kernel_queue_specific::max_num_work_groups>(
               q, WorkGroupSize, 0);
+  const auto props = sycl::ext::oneapi::experimental::properties{
+      sycl::ext::oneapi::experimental::use_root_sync};
   sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
   const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
   q.submit([&](sycl::handler &h) {
     sycl::accessor data{dataBuf, h};
-    h.parallel_for<class RootGroupKernel>(range, TestKernel1(data));
+    h.parallel_for<
+        class RootGroupKernel>(range, props, [=](sycl::nd_item<1> it) {
+      volatile float X = 1.0f;
+      volatile float Y = 1.0f;
+      auto root = it.ext_oneapi_get_root_group();
+      data[root.get_local_id()] = root.get_local_id();
+      sycl::group_barrier(root);
+      // Delay half of the workgroups with extra work to check that the barrier
+      // synchronizes the whole device.
+      if (it.get_group(0) % 2 == 0) {
+        X += sycl::sin(X);
+        Y += sycl::cos(Y);
+      }
+      root =
+          sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
+      int sum = data[root.get_local_id()] +
+                data[root.get_local_range() - root.get_local_id() - 1];
+      sycl::group_barrier(root);
+      data[root.get_local_id()] = sum;
+    });
   });
   sycl::host_accessor data{dataBuf};
   const int workItemCount = static_cast<int>(range.get_global_range().size());