Skip to content

Commit 8f7a07b

Browse files
committed
Add initial tests for work_group_memory extension
1 parent d6c78b9 commit 8f7a07b

File tree

3 files changed

+270
-1
lines changed

3 files changed

+270
-1
lines changed

sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,25 @@ class work_group_memory_impl {
3535
inline size_t getWorkGroupMemoryOwnSize(detail::work_group_memory_impl *wgm) {
3636
return wgm->wgm_size;
3737
}
38+
39+
// The following 3 functions help us get the address of the first element of a multi-dimensional
40+
// array, be it bounded or unbounded. A scalar is also included. In that case, it just returns
41+
// the address of the scalar.
42+
template <typename DataT>
43+
auto getData(DataT& scalar) {
44+
return &scalar;
45+
}
46+
47+
template <typename DataT, size_t N>
48+
auto getData(DataT (&bounded_arr)[N]) {
49+
return getData(bounded_arr[0]);
50+
}
51+
52+
template<typename DataT>
53+
auto getData(DataT (&unbounded_arr)[]) {
54+
return getData(unbounded_arr[0]);
55+
}
56+
3857
} // namespace detail
3958

4059
namespace ext::oneapi::experimental {
@@ -67,7 +86,7 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
6786
multi_ptr<value_type, access::address_space::local_space, IsDecorated>
6887
get_multi_ptr() const {
6988
return sycl::address_space_cast<access::address_space::local_space,
70-
IsDecorated, value_type>(ptr);
89+
IsDecorated, value_type>(sycl::detail::getData(*ptr));
7190
}
7291
DataT *operator&() const { return ptr; }
7392
operator DataT &() const { return *(this->operator&()); }
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// RUN: %{build} -o %{t.out}
2+
// RUN: %{run} %{t.out}
3+
4+
#include <sycl/detail/core.hpp>
5+
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
6+
#include <cassert>
7+
#include <cstring>
8+
9+
namespace syclexp = sycl::ext::oneapi::experimental;
10+
11+
// This test performs a swap of two scalars/arrays inside a kernel using a work_group_memory object as a temporary buffer.
12+
// The test is done for scalars types, bounded and unbounded arrays. After the kernel finishes, it is verified on the host side
13+
// that the swap worked.
14+
15+
template< typename T>
16+
void swap_scalar(T& a, T& b) {
17+
sycl::queue q;
18+
const T old_a = a;
19+
const T old_b = b;
20+
{
21+
sycl::buffer<T, 1> buf_a{ &a, 1};
22+
sycl::buffer<T, 1> buf_b{ &b, 1};
23+
q.submit([&](sycl::handler &cgh) {
24+
sycl::accessor acc_a{ buf_a, cgh };
25+
sycl::accessor acc_b { buf_b, cgh };
26+
syclexp::work_group_memory<T> temp{ cgh };
27+
cgh.single_task([=]() {
28+
temp = acc_a[0];
29+
acc_a[0] = acc_b[0];
30+
acc_b[0] = temp;
31+
});});
32+
}
33+
assert(a == old_b && b == old_a && "Swap assertion failed");
34+
}
35+
36+
template<typename T, size_t N>
37+
void swap_bounded_array_1d(T (&a)[N], T (&b)[N]) {
38+
sycl::queue q;
39+
T old_a[N];
40+
std::memcpy(old_a, a, sizeof(a));
41+
T old_b[N];
42+
std::memcpy(old_b, b, sizeof(b));
43+
{
44+
sycl::buffer<T, 1> buf_a{ a, N};
45+
sycl::buffer<T, 1> buf_b{ b, N};
46+
q.submit([&](sycl::handler &cgh) {
47+
sycl::accessor acc_a{ buf_a, cgh };
48+
sycl::accessor acc_b { buf_b, cgh };
49+
syclexp::work_group_memory<T[N]> temp{ cgh };
50+
cgh.single_task([=]() {
51+
for (int i= 0; i < N; ++i) {
52+
temp[i] = acc_a[i];
53+
acc_a[i] = acc_b[i];
54+
acc_b[i] = temp[i];
55+
}
56+
});});
57+
}
58+
for (int i = 0; i < N; ++i) {
59+
assert(a[i] == old_b[i] && b[i] == old_a[i] && "Swap assertion failed");
60+
}
61+
62+
}
63+
int main() {
64+
int a = 25;
65+
int b = 42;
66+
int arr1[5] = {0, 1, 2, 3, 4};
67+
int arr2[5] = {5, 6, 7, 8, 9};
68+
swap_scalar(a, b);
69+
swap_bounded_array_1d(arr1, arr2);
70+
return 0;
71+
}
72+
73+
74+
75+
76+
77+
78+
79+
80+
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// RUN: %{build} -o %{t.out}
2+
// RUN: %{run} %{t.out}
3+
4+
#include <type_traits>
5+
#include <cstdlib>
6+
#include <iostream>
7+
#include <sycl/detail/core.hpp>
8+
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
9+
#include <sycl/usm.hpp>
10+
11+
// Sanity test that checks to see if idiomatic code involving work_group_memory objects compiles and runs with no errors.
12+
13+
namespace syclex = sycl::ext::oneapi::experimental;
14+
sycl::queue global_q;
15+
16+
constexpr size_t SIZE = 4096;
17+
constexpr size_t WGSIZE = 256;
18+
19+
struct point {
20+
int x;
21+
int y;
22+
};
23+
24+
void simple_inc(const syclex::work_group_memory<int>& mem) {
25+
mem++;
26+
}
27+
28+
void fancy_inc(syclex::work_group_memory<int> mem) {
29+
syclex::work_group_memory<int> t = mem;
30+
t = mem;
31+
t++;
32+
}
33+
34+
void test_breadth() {
35+
sycl::queue q;
36+
global_q = q;
37+
38+
int *res = sycl::malloc_host<int>(16, q);
39+
40+
q.submit([&](sycl::handler &cgh) {
41+
syclex::work_group_memory<int> mem1{cgh};
42+
syclex::work_group_memory<int[10]> mem2{cgh};
43+
syclex::work_group_memory<int[10]> mem3{cgh};
44+
syclex::work_group_memory<int[]> mem4{5, cgh};
45+
syclex::work_group_memory<int[][10]> mem5{2, cgh};
46+
syclex::work_group_memory<int[][10]> mem6{2, cgh};
47+
syclex::work_group_memory<point> mem7{cgh};
48+
syclex::work_group_memory<point[][10]> mem8{2, cgh};
49+
50+
cgh.single_task([=] {
51+
// Operations on scalar
52+
++mem1;
53+
mem1++;
54+
mem1 += 1;
55+
mem1 = mem1 + 1;
56+
int *p1 = &mem1; (*p1)++;
57+
simple_inc(mem1);
58+
fancy_inc(mem1);
59+
res[0] = *(mem1.get_multi_ptr());
60+
res[1] = mem1;
61+
62+
// Operations on bounded array
63+
mem2[4] = mem2[4] + 1;
64+
int (*p2)[10] = &mem2; (*p2)[4]++;
65+
res[2] = mem2.get_multi_ptr()[4];
66+
res[3] = mem2[4];
67+
68+
mem3[4] = mem3[4] + 1;
69+
int (*p3)[10] = &mem3; (*p3)[4]++;
70+
res[4] = mem3.get_multi_ptr()[4];
71+
res[5] = mem3[4];
72+
73+
// Operations on unbounded array
74+
mem4[4] = mem4[4] + 1;
75+
int (*p4)[] = &mem4; (*p4)[4]++;
76+
res[6] = mem4.get_multi_ptr()[4];
77+
res[7] = mem4[4];
78+
79+
// Operations on unbounded multi-dimensional array
80+
mem5[1][5] = mem5[1][5] + 1;
81+
mem5[1][7] = mem5[1][7] + 1;
82+
res[8] = mem5.get_multi_ptr()[10 + 5];
83+
res[9] = mem5[1][7];
84+
85+
mem6[1][5] = mem6[1][5] + 1;
86+
mem6[1][7] = mem6[1][7] + 1;
87+
res[10] = mem6.get_multi_ptr()[10 + 5];
88+
res[11] = mem6[1][7];
89+
90+
// Operations on scalar struct
91+
(&mem7)->x++;
92+
(&mem7)->y += 1;
93+
point pnt = mem7;
94+
pnt.x++;
95+
pnt.y++;
96+
mem7 = pnt;
97+
res[12] = (&mem7)->x;
98+
res[13] = (&mem7)->y;
99+
100+
// Operations on unbounded multi-dimensional array of struct
101+
mem8[1][5].x++;
102+
mem8[1][5].y += 1;
103+
res[14] = mem8.get_multi_ptr()[10 + 5].x;
104+
res[15] = mem8[1][5].y;
105+
});
106+
}).wait();
107+
}
108+
109+
void test_basic() {
110+
sycl::queue q;
111+
112+
q.submit([&](sycl::handler &cgh) {
113+
// Allocate one element for each work-item in the work-group.
114+
syclex::work_group_memory<int[WGSIZE]> mem{cgh};
115+
116+
sycl::nd_range ndr{{SIZE}, {WGSIZE}};
117+
cgh.parallel_for(ndr, [=](sycl::nd_item<> it) {
118+
size_t id = it.get_local_linear_id();
119+
120+
// Each work-item has its own dedicated element of the array.
121+
mem[id] = 0;
122+
});
123+
}).wait();
124+
}
125+
126+
void test_operations() {
127+
sycl::queue q;
128+
129+
q.submit([&](sycl::handler &cgh) {
130+
syclex::work_group_memory<int> mem1{cgh}; // scalar
131+
syclex::work_group_memory<int[10]> mem2{cgh}; // bounded array
132+
syclex::work_group_memory<int[]> mem3{5, cgh}; // unbounded array
133+
syclex::work_group_memory<int[][10]> mem4{2, cgh}; // multi-dimensional array
134+
syclex::work_group_memory<point[10]> mem5{cgh}; // array of struct
135+
136+
sycl::nd_range ndr{{SIZE}, {WGSIZE}};
137+
cgh.parallel_for(ndr, [=](sycl::nd_item<> it) {
138+
if (it.get_group().leader()) {
139+
// A "work_group_memory" templated on a scalar type acts much like the
140+
// enclosed scalar type.
141+
++mem1;
142+
mem1++;
143+
mem1 += 1;
144+
mem1 = mem1 + 1;
145+
int *p1 = &mem1;
146+
147+
// A "work_group_memory" templated on an array type (either bounded or
148+
// unbounded) acts like an array.
149+
++mem2[4];
150+
mem2[4]++;
151+
mem2[4] = mem2[4] + 1;
152+
int *p2 = &mem2[4];
153+
154+
// A multi-dimensional array works as expected.
155+
mem4[1][5] = mem4[1][5] + 1;
156+
mem4[1][7] = mem4[1][7] + 1;
157+
158+
// An array of structs works as expected too.
159+
mem5[1].x++;
160+
mem5[1].y = mem5[1].y + 1;
161+
}
162+
});
163+
}).wait();
164+
}
165+
166+
int main() {
167+
test_breadth();
168+
test_basic();
169+
test_operations();
170+
}

0 commit comments

Comments
 (0)