Skip to content

Commit 79e33ee

Browse files
committed
[SYCL] Fix error irrelevant to use_root_sync
Signed-off-by: Hu, Peisen <[email protected]>
1 parent ba746b3 commit 79e33ee

File tree

1 file changed

+36
-33
lines changed

1 file changed

+36
-33
lines changed

sycl/test-e2e/GroupAlgorithm/root_group.cpp

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// XFAIL: (opencl && !cpu && !accelerator)
33
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
44

5-
// RUN: %{build} -I . -o %t.out %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
5+
// TODO: Currently using the -Wno-deprecated-declarations flag due to issue
6+
// https://github.com/intel/llvm/issues/16451. Rewrite testRootGroup() amd
7+
// remove the flag once the issue is resolved.
8+
// RUN: %{build} -I . -o %t.out -Wno-deprecated-declarations %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
69
// RUN: %{run} %t.out
710

811
// Disabled temporarily while investigation into the failure is ongoing.
@@ -60,6 +63,34 @@ void testQueriesAndProperties() {
6063
check_max_num_work_group_sync(maxWGsWithLimits);
6164
}
6265

66+
template <typename T> struct TestKernel1 {
67+
T m_data;
68+
TestKernel1(T &data_) : m_data(data_) {}
69+
void operator()(sycl::nd_item<1> it) const {
70+
volatile float X = 1.0f;
71+
volatile float Y = 1.0f;
72+
auto root = it.ext_oneapi_get_root_group();
73+
m_data[root.get_local_id()] = root.get_local_id();
74+
sycl::group_barrier(root);
75+
// Delay half of the workgroups with extra work to check that the barrier
76+
// synchronizes the whole device.
77+
if (it.get_group(0) % 2 == 0) {
78+
X += sycl::sin(X);
79+
Y += sycl::cos(Y);
80+
}
81+
root = sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
82+
int sum = m_data[root.get_local_id()] +
83+
m_data[root.get_local_range() - root.get_local_id() - 1];
84+
sycl::group_barrier(root);
85+
m_data[root.get_local_id()] = sum;
86+
}
87+
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
88+
return sycl::ext::oneapi::experimental::properties{
89+
sycl::ext::oneapi::experimental::use_root_sync};
90+
;
91+
}
92+
};
93+
6394
void testRootGroup() {
6495
sycl::queue q;
6596
const auto bundle =
@@ -70,41 +101,13 @@ void testRootGroup() {
70101
.ext_oneapi_get_info<sycl::ext::oneapi::experimental::info::
71102
kernel_queue_specific::max_num_work_groups>(
72103
q, WorkGroupSize, 0);
104+
const auto props = sycl::ext::oneapi::experimental::properties{
105+
sycl::ext::oneapi::experimental::use_root_sync};
73106
sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
74107
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
75-
struct TestKernel1 {
76-
sycl::buffer<int> *m_dataBuf;
77-
sycl::handler *m_h;
78-
TestKernel1(sycl::buffer<int> *dataBuf, sycl::handler *h)
79-
: m_dataBuf(dataBuf), m_h(h) {}
80-
void operator()(sycl::nd_item<1> it) const {
81-
sycl::accessor data{*m_dataBuf, *m_h};
82-
volatile float X = 1.0f;
83-
volatile float Y = 1.0f;
84-
auto root = it.ext_oneapi_get_root_group();
85-
data[root.get_local_id()] = root.get_local_id();
86-
sycl::group_barrier(root);
87-
// Delay half of the workgroups with extra work to check that the barrier
88-
// synchronizes the whole device.
89-
if (it.get_group(0) % 2 == 0) {
90-
X += sycl::sin(X);
91-
Y += sycl::cos(Y);
92-
}
93-
root =
94-
sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
95-
int sum = data[root.get_local_id()] +
96-
data[root.get_local_range() - root.get_local_id() - 1];
97-
sycl::group_barrier(root);
98-
data[root.get_local_id()] = sum;
99-
}
100-
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
101-
return sycl::ext::oneapi::experimental::properties{
102-
sycl::ext::oneapi::experimental::use_root_sync};
103-
;
104-
}
105-
};
106108
q.submit([&](sycl::handler &h) {
107-
h.parallel_for<class RootGroupKernel>(range, TestKernel1(&dataBuf, &h));
109+
sycl::accessor data{dataBuf, h};
110+
h.parallel_for<class RootGroupKernel>(range, TestKernel1(data));
108111
});
109112
sycl::host_accessor data{dataBuf};
110113
const int workItemCount = static_cast<int>(range.get_global_range().size());

0 commit comments

Comments
 (0)