1- // RUN: %{build} -o %{ t.out}
2- // RUN: %{run} %{ t.out}
1+ // RUN: %{build} -o %t.out
2+ // RUN: %{run} %t.out
33
44#include < sycl/detail/core.hpp>
55#include < sycl/ext/oneapi/experimental/work_group_memory.hpp>
99namespace syclexp = sycl::ext::oneapi::experimental;
1010
1111// This test performs a swap of two scalars/arrays inside a kernel using a work_group_memory object as a temporary buffer.
12- // The test is done for scalars types, bounded and unbounded arrays. After the kernel finishes, it is verified on the host side
12+ // The test is done for scalar types and bounded arrays. After the kernel finishes, it is verified on the host side
1313// that the swap worked.
14+
15+ // One important note is that for unbounded arrays, the feature is unstable. Specifically, the code may or may not compile
16+ // when kernels reference work group memory objects that have been constructed with the unbounded array type. This is due to a
17+ // limitation of SPIRV where it does not allow arrays of length zero. For example, an unbounded array may be translated to an array of length zero in
18+ // LLVM IR and during the LLVM IR -> SPIRV translation phase, the translator rejects all arrays of length zero because they are invalid constructs in SPIRV.
19+ // As a result of this, unbounded arrays do not appear in this test. They do appear in the sanity test though in this directory because there the unbounded arrays are used with concrete subscript indices which seems to work, for now at least.
1420
1521template < typename T>
1622void swap_scalar (T& a, T& b) {
@@ -30,11 +36,69 @@ void swap_scalar(T& a, T& b) {
3036 acc_b[0 ] = temp;
3137 });});
3238 }
33- assert (a == old_b && b == old_a && " Swap assertion failed" );
39+ assert (a == old_b && b == old_a && " Incorrect swap!" );
40+ // swap again but this time using two temporaries. The first temporary will be used to save the value of a
41+ // and the second temporay will be default-constructed and then copy-assigned from the first temporary
42+ // to be then used to write that value to b.
43+ {
44+ sycl::buffer<T, 1 > buf_a{ &a, 1 };
45+ sycl::buffer<T, 1 > buf_b{ &b, 1 };
46+ q.submit ([&](sycl::handler &cgh) {
47+ sycl::accessor acc_a{ buf_a, cgh };
48+ sycl::accessor acc_b { buf_b, cgh };
49+ syclexp::work_group_memory<T> temp{ cgh };
50+ syclexp::work_group_memory<T> temp2;
51+ cgh.single_task ([=]() {
52+ temp2 = temp; // temp and temp2 have the same underlying data
53+ temp = acc_a[0 ];
54+ acc_a[0 ] = acc_b[0 ];
55+ acc_b[0 ] = temp2; // safe to use temp2
56+ });});
57+ }
58+ // Two swaps same as no swaps
59+ assert (a == old_a && b == old_b && " Incorrect swap!" );
60+ // Initialize a second temporary and instead of assigning the first temporary to it, assign only the value of the data
61+ // of the first temporary so that unlike above, the two temporaries will not be aliasing the same memory location but they
62+ // will have equal values.
63+ {
64+ sycl::buffer<T, 1 > buf_a{ &a, 1 };
65+ sycl::buffer<T, 1 > buf_b{ &b, 1 };
66+ q.submit ([&](sycl::handler &cgh) {
67+ sycl::accessor acc_a{ buf_a, cgh };
68+ sycl::accessor acc_b { buf_b, cgh };
69+ syclexp::work_group_memory<T> temp{ cgh };
70+ syclexp::work_group_memory<T> temp2{ cgh };
71+ cgh.single_task ([=]() {
72+ temp = acc_a[0 ];
73+ acc_a[0 ] = acc_b[0 ];
74+ temp2 = *(temp.get_multi_ptr ()); // temp2 now has the same value as temp but not the same memory location
75+ acc_b[0 ] = temp2;
76+ });});
77+ }
78+ // Three swaps same as one swap
79+ assert (a == old_b && b == old_a && " Incorrect swap!" );
80+ // Same as above but instead of using multi_ptr, use address-of operator.
81+ {
82+ sycl::buffer<T, 1 > buf_a{ &a, 1 };
83+ sycl::buffer<T, 1 > buf_b{ &b, 1 };
84+ q.submit ([&](sycl::handler &cgh) {
85+ sycl::accessor acc_a{ buf_a, cgh };
86+ sycl::accessor acc_b { buf_b, cgh };
87+ syclexp::work_group_memory<T> temp{ cgh };
88+ syclexp::work_group_memory<T> temp2{ cgh };
89+ cgh.single_task ([=]() {
90+ temp = acc_a[0 ];
91+ acc_a[0 ] = acc_b[0 ];
92+ temp2 = *(&temp);
93+ acc_b[0 ] = temp2;
94+ });});
95+ }
96+ // Four swaps same as no swap
97+ assert (a == old_a && b == old_b && " Incorrect swap!" );
3498}
3599
36100template <typename T, size_t N>
37- void swap_bounded_array_1d (T (&a)[N], T (&b)[N]) {
101+ void swap_array_1d (T (&a)[N], T (&b)[N]) {
38102sycl::queue q;
39103 T old_a[N];
40104 std::memcpy (old_a, a, sizeof (a));
@@ -56,25 +120,182 @@ for (int i= 0; i < N; ++i) {
56120 });});
57121 }
58122for (int i = 0 ; i < N; ++i) {
59- assert (a[i] == old_b[i] && b[i] == old_a[i] && " Swap assertion failed" );
123+ assert (a[i] == old_b[i] && b[i] == old_a[i] && " Incorrect swap!" );
124+ }
125+
126+ // Instead of working with the temporary work group memory object, we retrieve its corresponding
127+ // multi-pointer and work with it instead.
128+ {
129+ sycl::buffer<T, 1 > buf_a{ a, N};
130+ sycl::buffer<T, 1 > buf_b{ b, N};
131+ q.submit ([&](sycl::handler &cgh) {
132+ sycl::accessor acc_a{ buf_a, cgh };
133+ sycl::accessor acc_b { buf_b, cgh };
134+ syclexp::work_group_memory<T[N]> temp{ cgh };
135+ cgh.single_task ([=]() {
136+ auto ptr = temp.get_multi_ptr ();
137+ for (int i= 0 ; i < N; ++i) {
138+ ptr[i] = acc_a[i];
139+ acc_a[i] = acc_b[i];
140+ acc_b[i] = ptr[i];
141+ }
142+ });});
143+ }
144+ // Two swaps same as ono swap
145+ for (int i = 0 ; i < N; ++i) {
146+ assert (a[i] == old_a[i] && b[i] == old_b[i] && " Incorrect swap!" );
147+ }
148+
149+ // Same as above but use a pointer returned by the address-of operator instead.
150+ {
151+ sycl::buffer<T, 1 > buf_a{ a, N};
152+ sycl::buffer<T, 1 > buf_b{ b, N};
153+ q.submit ([&](sycl::handler &cgh) {
154+ sycl::accessor acc_a{ buf_a, cgh };
155+ sycl::accessor acc_b { buf_b, cgh };
156+ syclexp::work_group_memory<T[N]> temp{ cgh };
157+ cgh.single_task ([=]() {
158+ auto ptr = &temp;
159+ for (int i= 0 ; i < N; ++i) {
160+ (*ptr)[i] = acc_a[i];
161+ acc_a[i] = acc_b[i];
162+ acc_b[i] = (*ptr)[i];
163+ }
164+ });});
165+ }
166+ // Three swaps same as one swap
167+ for (int i = 0 ; i < N; ++i) {
168+ assert (a[i] == old_b[i] && b[i] == old_a[i] && " Incorrect swap!" );
169+
60170}
61-
62171}
172+
173+
174+ template <typename T, size_t N, size_t M>
175+ void swap_array_2d (T (&a)[N][M], T (&b)[N][M]) {
176+ sycl::queue q;
177+ T old_a[N][M];
178+ for (int i = 0 ; i < N; ++i) {
179+ std::memcpy (old_a[i], a[i], sizeof (a[0 ]));
180+ }
181+ T old_b[N][M];
182+ for (int i = 0 ; i < N; ++i) {
183+
184+ std::memcpy (old_b[i], b[i], sizeof (b[0 ]));
185+ }
186+ {
187+ sycl::buffer<T, 2 > buf_a{ a[0 ], sycl::range{N, M}};
188+ sycl::buffer<T, 2 > buf_b{ b[0 ], sycl::range{N, M}};
189+ q.submit ([&](sycl::handler &cgh) {
190+ sycl::accessor acc_a{ buf_a, cgh };
191+ sycl::accessor acc_b { buf_b, cgh };
192+ syclexp::work_group_memory<T[N][M]> temp{ cgh };
193+ cgh.single_task ([=]() {
194+ for (int i= 0 ; i < N; ++i) {
195+ for (int j = 0 ; j < M; ++j) {
196+ temp[i][j]= acc_a[i][j];
197+ acc_a[i][j] = acc_b[i][j];
198+ acc_b[i][j] = temp[i][j];
199+ }
200+ }
201+ });});
202+ }
203+ for (int i = 0 ; i < N; ++i) {
204+ for (int j = 0 ; j < M; ++j) {
205+ assert (a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && " Incorrect swap!" );
206+ }
207+ }
208+
209+ // Perform the swap but this time use two temporary work group memory objects.
210+ // One will save the value of acc_a and the other will be copy-assigned from it
211+ // and will be used to write the values back to acc_b.
212+ {
213+ sycl::buffer<T, 2 > buf_a{ a[0 ], sycl::range{N, M}};
214+ sycl::buffer<T, 2 > buf_b{ b[0 ], sycl::range{N, M}};
215+ q.submit ([&](sycl::handler &cgh) {
216+ sycl::accessor acc_a{ buf_a, cgh };
217+ sycl::accessor acc_b { buf_b, cgh };
218+ syclexp::work_group_memory<T[N][M]> temp{ cgh };
219+ syclexp::work_group_memory<T[N][M]> temp2{ cgh };
220+ cgh.single_task ([=]() {
221+ for (int i= 0 ; i < N; ++i) {
222+ for (int j = 0 ; j < M; ++j) {
223+ temp[i][j]= acc_a[i][j];
224+ acc_a[i][j] = acc_b[i][j];
225+ }
226+ }
227+ syclexp::work_group_memory<T[N][M]> temp2;
228+ temp2 = temp;
229+ for (int i = 0 ; i < N; ++i) {
230+ for (int j = 0 ; j < M; ++j) {
231+ acc_b[i][j] = temp2[i][j];
232+ }
233+ }
234+ });});
235+ }
236+ for (int i = 0 ; i < N; ++i) {
237+ for (int j = 0 ; j < M; ++j) {
238+ // Two swaps are the same as no swap
239+ assert (a[i][j] == old_a[i][j] && b[i][j] == old_b[i][j] && " Incorrect swap!" );
240+ }
241+ }
242+
243+ // Same as above but construct the second temporary inside the kernel and copy-construct it from the first temporary.
244+ {
245+ sycl::buffer<T, 2 > buf_a{ a[0 ], sycl::range{N, M}};
246+ sycl::buffer<T, 2 > buf_b{ b[0 ], sycl::range{N, M}};
247+ q.submit ([&](sycl::handler &cgh) {
248+ sycl::accessor acc_a{ buf_a, cgh };
249+ sycl::accessor acc_b { buf_b, cgh };
250+ syclexp::work_group_memory<T[N][M]> temp{ cgh };
251+ syclexp::work_group_memory<T[N][M]> temp2{ cgh };
252+ cgh.single_task ([=]() {
253+ for (int i= 0 ; i < N; ++i) {
254+ for (int j = 0 ; j < M; ++j) {
255+ temp[i][j]= acc_a[i][j];
256+ acc_a[i][j] = acc_b[i][j];
257+ }
258+ }
259+ syclexp::work_group_memory<T[N][M]> temp2{ temp };
260+ for (int i = 0 ; i < N; ++i) {
261+ for (int j = 0 ; j < M; ++j) {
262+ acc_b[i][j] = temp2[i][j];
263+ }
264+ }
265+ });});
266+ }
267+ for (int i = 0 ; i < N; ++i) {
268+ for (int j = 0 ; j < M; ++j) {
269+ // Three swaps are the same as one swap
270+ assert (a[i][j] == old_b[i][j] && b[i][j] == old_a[i][j] && " Incorrect swap!" );
271+ }
272+ }
273+
274+ }
275+ constexpr size_t N = 100 ;
276+ constexpr size_t M = 100 ;
63277int main () {
64- int a = 25 ;
65- int b = 42 ;
66- int arr1[5 ] = {0 , 1 , 2 , 3 , 4 };
67- int arr2[5 ] = {5 , 6 , 7 , 8 , 9 };
68- swap_scalar (a, b);
69- swap_bounded_array_1d (arr1, arr2);
278+ int intarr1[N][M];
279+ int intarr2[N][M];
280+ float floatarr1[N][M];
281+ float floatarr2[N][M];
282+ for (int i = 0 ; i < N; ++i) {
283+ for (int j = 0 ; j < M; ++j) {
284+ intarr1[i][j] = i + j;
285+ intarr2[i][j] = i * j;
286+ floatarr1[i][j] = (i + 1 ) / (j + 1 );
287+ floatarr2[i][j] = (j + 1 ) / (i + 1 );
288+ }
289+ }
290+ for (int i = 0 ; i < N; ++i) {
291+ for (int j = 0 ; j < M; ++j) {
292+ swap_scalar (intarr1[i][j], intarr2[i][j]);
293+ swap_scalar (floatarr1[i][j], floatarr2[i][j]);
294+ }
295+ swap_array_1d (intarr1[i], intarr2[i]);
296+ swap_array_1d (floatarr1[i], floatarr2[i]);
297+ }
298+ swap_array_2d (intarr1, intarr2);
299+ swap_array_2d (floatarr1, floatarr2);
70300return 0 ;
71301}
72-
73-
74-
75-
76-
77-
78-
79-
80-
0 commit comments