22// XFAIL: (opencl && !cpu && !accelerator)
33// XFAIL-TRACKER: https://github.com/intel/llvm/issues/14641
44
5- // RUN: %{build} -Wno-deprecated-declarations - I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
5+ // RUN: %{build} -I . -o %t.out %if any-device-is-cuda %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 %}
66// RUN: %{run} %t.out
77
88// Disabled temporarily while investigation into the failure is ongoing.
@@ -60,6 +60,34 @@ void testQueriesAndProperties() {
6060 check_max_num_work_group_sync (maxWGsWithLimits);
6161}
6262
63+ template <typename T> class TestKernel1 {
64+ T *m_data;
65+ TestKernel1 (T *data) : m_data(data) {}
66+ void operator ()(sycl::nd_item<1 > it) const {
67+ volatile float X = 1 .0f ;
68+ volatile float Y = 1 .0f ;
69+ auto root = it.ext_oneapi_get_root_group ();
70+ *m_data[root.get_local_id ()] = root.get_local_id ();
71+ sycl::group_barrier (root);
72+ // Delay half of the workgroups with extra work to check that the barrier
73+ // synchronizes the whole device.
74+ if (it.get_group (0 ) % 2 == 0 ) {
75+ X += sycl::sin (X);
76+ Y += sycl::cos (Y);
77+ }
78+ root = sycl::ext::oneapi::experimental::this_work_item::get_root_group<1 >();
79+ int sum = *m_data[root.get_local_id ()] +
80+ *m_data[root.get_local_range () - root.get_local_id () - 1 ];
81+ sycl::group_barrier (root);
82+ *m_data[root.get_local_id ()] = sum;
83+ }
84+ auto get (sycl::ext::oneapi::experimental::properties_tag) {
85+ return sycl::ext::oneapi::experimental::properties{
86+ sycl::ext::oneapi::experimental::use_root_sync};
87+ ;
88+ }
89+ };
90+
6391void testRootGroup () {
6492 sycl::queue q;
6593 const auto bundle =
@@ -72,34 +100,6 @@ void testRootGroup() {
72100 q, WorkGroupSize, 0 );
73101 sycl::buffer<int > dataBuf{sycl::range{maxWGs * WorkGroupSize}};
74102 const auto range = sycl::nd_range<1 >{maxWGs * WorkGroupSize, WorkGroupSize};
75- struct TestKernel1 {
76- sycl::accessor *m_data;
77- TestKernel1 (sycl::accessor *data) : m_data(data) {}
78- void operator ()(sycl::nd_item<1 > it) const {
79- volatile float X = 1 .0f ;
80- volatile float Y = 1 .0f ;
81- auto root = it.ext_oneapi_get_root_group ();
82- *m_data[root.get_local_id ()] = root.get_local_id ();
83- sycl::group_barrier (root);
84- // Delay half of the workgroups with extra work to check that the barrier
85- // synchronizes the whole device.
86- if (it.get_group (0 ) % 2 == 0 ) {
87- X += sycl::sin (X);
88- Y += sycl::cos (Y);
89- }
90- root =
91- sycl::ext::oneapi::experimental::this_work_item::get_root_group<1 >();
92- int sum = *m_data[root.get_local_id ()] +
93- *m_data[root.get_local_range () - root.get_local_id () - 1 ];
94- sycl::group_barrier (root);
95- *m_data[root.get_local_id ()] = sum;
96- }
97- auto get (sycl::ext::oneapi::experimental::properties_tag) {
98- return sycl::ext::oneapi::experimental::properties{
99- sycl::ext::oneapi::experimental::use_root_sync};
100- ;
101- }
102- };
103103 q.submit ([&](sycl::handler &h) {
104104 sycl::accessor data{dataBuf, h};
105105 h.parallel_for <class RootGroupKernel >(range, TestKernel1 (&data));
@@ -111,6 +111,34 @@ void testRootGroup() {
111111 }
112112}
113113
114+ template <typename T> class TestKernel2 {
115+ T *m_testResults;
116+ void operator ()(sycl::nd_item<1 > it) const {
117+ const auto root = it.ext_oneapi_get_root_group ();
118+ if (root.leader () || root.get_local_id () == 3 ) {
119+ *m_testResults[0 ] = root.get_group_id () == sycl::id<1 >(0 );
120+ *m_testResults[1 ] = root.leader () ? root.get_local_id () == sycl::id<1 >(0 )
121+ : root.get_local_id () == sycl::id<1 >(3 );
122+ *m_testResults[2 ] = root.get_group_range () == sycl::range<1 >(1 );
123+ *m_testResults[3 ] = root.get_local_range () == it.get_global_range ();
124+ *m_testResults[4 ] = root.get_max_local_range () == root.get_local_range ();
125+ *m_testResults[5 ] = root.get_group_linear_id () == 0 ;
126+ *m_testResults[6 ] =
127+ root.get_local_linear_id () == root.get_local_id ().get (0 );
128+ *m_testResults[7 ] = root.get_group_linear_range () == 1 ;
129+ *m_testResults[8 ] =
130+ root.get_local_linear_range () == root.get_local_range ().size ();
131+ }
132+ }
133+ auto get (sycl::ext::oneapi::experimental::properties_tag) {
134+ return sycl::ext::oneapi::experimental::properties{
135+ sycl::ext::oneapi::experimental::use_root_sync};
136+ }
137+
138+ public:
139+ TestKernel2 (T *testResults) : m_testResults(testResults) {}
140+ };
141+
114142void testRootGroupFunctions () {
115143 sycl::queue q;
116144 const auto bundle =
@@ -124,34 +152,6 @@ void testRootGroupFunctions() {
124152 constexpr int testCount = 9 ;
125153 sycl::buffer<bool > testResultsBuf{sycl::range{testCount}};
126154 const auto range = sycl::nd_range<1 >{maxWGs * WorkGroupSize, WorkGroupSize};
127- struct TestKernel2 {
128- sycl::accessor *m_testResults;
129- TestKernel2 (sycl::accessor *testResults) : m_testResults(testResults) {}
130- void operator ()(sycl::nd_item<1 > it) const {
131- const auto root = it.ext_oneapi_get_root_group ();
132- if (root.leader () || root.get_local_id () == 3 ) {
133- *m_testResults[0 ] = root.get_group_id () == sycl::id<1 >(0 );
134- *m_testResults[1 ] = root.leader ()
135- ? root.get_local_id () == sycl::id<1 >(0 )
136- : root.get_local_id () == sycl::id<1 >(3 );
137- *m_testResults[2 ] = root.get_group_range () == sycl::range<1 >(1 );
138- *m_testResults[3 ] = root.get_local_range () == it.get_global_range ();
139- *m_testResults[4 ] =
140- root.get_max_local_range () == root.get_local_range ();
141- *m_testResults[5 ] = root.get_group_linear_id () == 0 ;
142- *m_testResults[6 ] =
143- root.get_local_linear_id () == root.get_local_id ().get (0 );
144- *m_testResults[7 ] = root.get_group_linear_range () == 1 ;
145- *m_testResults[8 ] =
146- root.get_local_linear_range () == root.get_local_range ().size ();
147- }
148- }
149- auto get (sycl::ext::oneapi::experimental::properties_tag) {
150- return sycl::ext::oneapi::experimental::properties{
151- sycl::ext::oneapi::experimental::use_root_sync};
152- }
153- };
154-
155155 q.submit ([&](sycl::handler &h) {
156156 sycl::accessor testResults{testResultsBuf, h};
157157 h.parallel_for <class RootGroupFunctionsKernel >(range,
0 commit comments