Skip to content

Commit bcc6eb4

Browse files
committed
[SYCL] Attempt to resolve synchronization issue
Signed-off-by: Hu, Peisen <[email protected]>
1 parent 31b8aaf commit bcc6eb4

File tree

2 files changed

+27
-29
lines changed

2 files changed

+27
-29
lines changed

sycl/test-e2e/GroupAlgorithm/root_group.cpp

Lines changed: 26 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// XFAIL: (opencl && !cpu && !accelerator)
33
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
44

5-
// RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
5+
// RUN: %{build} -Wno-deprecated-declarations -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
66
// RUN: %{run} %t.out
77

88
// Disabled temporarily while investigation into the failure is ongoing.
@@ -73,16 +73,13 @@ void testRootGroup() {
7373
sycl::buffer<int> dataBuf{sycl::range{maxWGs * WorkGroupSize}};
7474
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
7575
struct TestKernel1 {
76-
sycl::buffer<int> *m_dataBuf;
77-
sycl::handler *m_h;
78-
TestKernel1(sycl::buffer<int> *dataBuf, sycl::handler *h)
79-
: m_dataBuf(dataBuf), m_h(h) {}
76+
sycl::accessor *m_data;
77+
TestKernel1(sycl::accessor *data) : m_data(data) {}
8078
void operator()(sycl::nd_item<1> it) const {
81-
sycl::accessor data{*m_dataBuf, *m_h};
8279
volatile float X = 1.0f;
8380
volatile float Y = 1.0f;
8481
auto root = it.ext_oneapi_get_root_group();
85-
data[root.get_local_id()] = root.get_local_id();
82+
*m_data[root.get_local_id()] = root.get_local_id();
8683
sycl::group_barrier(root);
8784
// Delay half of the workgroups with extra work to check that the barrier
8885
// synchronizes the whole device.
@@ -92,10 +89,10 @@ void testRootGroup() {
9289
}
9390
root =
9491
sycl::ext::oneapi::experimental::this_work_item::get_root_group<1>();
95-
int sum = data[root.get_local_id()] +
96-
data[root.get_local_range() - root.get_local_id() - 1];
92+
int sum = *m_data[root.get_local_id()] +
93+
*m_data[root.get_local_range() - root.get_local_id() - 1];
9794
sycl::group_barrier(root);
98-
data[root.get_local_id()] = sum;
95+
*m_data[root.get_local_id()] = sum;
9996
}
10097
auto get(sycl::ext::oneapi::experimental::properties_tag) {
10198
return sycl::ext::oneapi::experimental::properties{
@@ -104,7 +101,8 @@ void testRootGroup() {
104101
}
105102
};
106103
q.submit([&](sycl::handler &h) {
107-
h.parallel_for<class RootGroupKernel>(range, TestKernel1(&dataBuf, &h));
104+
sycl::accessor data{dataBuf, h};
105+
h.parallel_for<class RootGroupKernel>(range, TestKernel1(&data));
108106
});
109107
sycl::host_accessor data{dataBuf};
110108
const int workItemCount = static_cast<int>(range.get_global_range().size());
@@ -127,25 +125,24 @@ void testRootGroupFunctions() {
127125
sycl::buffer<bool> testResultsBuf{sycl::range{testCount}};
128126
const auto range = sycl::nd_range<1>{maxWGs * WorkGroupSize, WorkGroupSize};
129127
struct TestKernel2 {
130-
sycl::buffer<bool> *m_testResultsBuf;
131-
sycl::handler *m_h;
132-
TestKernel2(sycl::buffer<bool> *testResultsBuf, sycl::handler *h)
133-
: m_testResultsBuf(testResultsBuf), m_h(h) {}
128+
sycl::accessor *m_testResults;
129+
TestKernel2(sycl::accessor *testResults) : m_testResults(testResults) {}
134130
void operator()(sycl::nd_item<1> it) const {
135-
sycl::accessor testResults{*m_testResultsBuf, *m_h};
136131
const auto root = it.ext_oneapi_get_root_group();
137132
if (root.leader() || root.get_local_id() == 3) {
138-
testResults[0] = root.get_group_id() == sycl::id<1>(0);
139-
testResults[1] = root.leader() ? root.get_local_id() == sycl::id<1>(0)
140-
: root.get_local_id() == sycl::id<1>(3);
141-
testResults[2] = root.get_group_range() == sycl::range<1>(1);
142-
testResults[3] = root.get_local_range() == it.get_global_range();
143-
testResults[4] = root.get_max_local_range() == root.get_local_range();
144-
testResults[5] = root.get_group_linear_id() == 0;
145-
testResults[6] =
133+
*m_testResults[0] = root.get_group_id() == sycl::id<1>(0);
134+
*m_testResults[1] = root.leader()
135+
? root.get_local_id() == sycl::id<1>(0)
136+
: root.get_local_id() == sycl::id<1>(3);
137+
*m_testResults[2] = root.get_group_range() == sycl::range<1>(1);
138+
*m_testResults[3] = root.get_local_range() == it.get_global_range();
139+
*m_testResults[4] =
140+
root.get_max_local_range() == root.get_local_range();
141+
*m_testResults[5] = root.get_group_linear_id() == 0;
142+
*m_testResults[6] =
146143
root.get_local_linear_id() == root.get_local_id().get(0);
147-
testResults[7] = root.get_group_linear_range() == 1;
148-
testResults[8] =
144+
*m_testResults[7] = root.get_group_linear_range() == 1;
145+
*m_testResults[8] =
149146
root.get_local_linear_range() == root.get_local_range().size();
150147
}
151148
}
@@ -156,8 +153,9 @@ void testRootGroupFunctions() {
156153
};
157154

158155
q.submit([&](sycl::handler &h) {
159-
h.parallel_for<class RootGroupFunctionsKernel>(
160-
range, TestKernel2(&testResultsBuf, &h));
156+
sycl::accessor testResults{testResultsBuf, h};
157+
h.parallel_for<class RootGroupFunctionsKernel>(range,
158+
TestKernel2(&testResults));
161159
});
162160
sycl::host_accessor testResults{testResultsBuf};
163161
for (int i = 0; i < testCount; i++) {

sycl/test-e2e/Properties/cache_config.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ int main() {
5151
sycl::ext::oneapi::experimental::properties properties{
5252
cache_config(large_slm)};
5353

54-
// CHECK: parallel_for with sycl::range and KernelFunctor
54+
// CHECK: parallel_for with sycl::range
5555
// CHECK: ZE ---> zeKernelSetCacheConfig
5656
std::cout << "parallel_for with sycl::range" << std::endl;
5757
q.parallel_for(range<2>{16, 16}, RangeKernelFunctor{}).wait();

0 commit comments

Comments
 (0)