Skip to content

Commit 1358b70

Browse files
committed
[SYCL] Address problem with root_group.cpp
Signed-off-by: Hu, Peisen <[email protected]>
1 parent cd43cfb commit 1358b70

File tree

1 file changed

+26
-30
lines changed

1 file changed

+26
-30
lines changed

sycl/test-e2e/GroupAlgorithm/root_group.cpp

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// XFAIL: (opencl && !cpu && !accelerator)
33
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
44

5-
// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
5+
// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
6+
// https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd
7+
// remove the flag once the issue is resolved.
8+
// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
69
// RUN: %{run} %t.out
710

811
// Disabled temporarily while investigation into the failure is ongoing.
@@ -60,34 +63,6 @@ void testQueriesAndProperties() {
6063
check_max_num_work_group_sync(maxWGsWithLimits);
6164
}
6265

63-
template <typename T> struct TestKernel1 {
64-
T m_data;
65-
TestKernel1(T &data_) : m_data(data_) {}
66-
void operator()(sycl::nd_item<1> it) const {
67-
volatile float X = 1.0f;
68-
volatile float Y = 1.0f;
69-
auto root = it.ext_oneapi_get_root_group();
70-
m_data[root.get_local_id()] = root.get_local_id();
71-
sycl::group_barrier(root);
72-
// Delay half of the workgroups with extra work to check that the barrier
73-
// synchronizes the whole device.
74-
if (it.get_group(0) % 2 == 0) {
75-
X += sycl::sin(X);
76-
Y += sycl::cos(Y);
77-
}
78-
root = sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
79-
int sum = m_data[root.get_local_id()] +
80-
m_data[root.get_local_range() - root.get_local_id() - 1];
81-
sycl::group_barrier(root);
82-
m_data[root.get_local_id()] = sum;
83-
}
84-
auto get(sycl::ext::oneapi::experimental::properties_tag) {
85-
return sycl::ext::oneapi::experimental::properties{
86-
sycl::ext::oneapi::experimental::use_root_sync};
87-
;
88-
}
89-
};
90-
9166
void testRootGroup() {
9267
sycl::queue q;
9368
const auto bundle =
@@ -98,11 +73,32 @@ void testRootGroup() {
9873
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
9974
kernel_queue_specific::max_num_work_groups>(
10075
q, WorkGroupSize, 0);
76+
const auto props = sycl::ext::oneapi::experimental::properties{
77+
sycl::ext::oneapi::experimental::use_root_sync};
10178
sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
10279
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
10380
q.submit([&](sycl::handler &h) {
10481
sycl::accessor data{dataBuf, h};
105-
h.parallel_for<class RootGroupKernel>(range, TestKernel1(data));
82+
h.parallel_for<
83+
class RootGroupKernel>(range, props, [=](sycl::nd_item<1> it) {
84+
volatile float X = 1.0f;
85+
volatile float Y = 1.0f;
86+
auto root = it.ext_oneapi_get_root_group();
87+
data[root.get_local_id()] = root.get_local_id();
88+
sycl::group_barrier(root);
89+
// Delay half of the workgroups with extra work to check that the barrier
90+
// synchronizes the whole device.
91+
if (it.get_group(0) % 2 == 0) {
92+
X += sycl::sin(X);
93+
Y += sycl::cos(Y);
94+
}
95+
root =
96+
sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
97+
int sum = data[root.get_local_id()] +
98+
data[root.get_local_range() - root.get_local_id() - 1];
99+
sycl::group_barrier(root);
100+
data[root.get_local_id()] = sum;
101+
});
106102
});
107103
sycl::host_accessor data{dataBuf};
108104
const int workItemCount = static_cast<int>(range.get_global_range().size());

0 commit comments

Comments
 (0)