44// RUN: %{run} %t.out
55
66// This test verifies that we can compile, run and get correct results when
7- // using a free function kernel that uses the work group scratch memory feature.
7+ // using a free function kernel that allocates shared local memory in a kernel
8+ // either by way of th work group scratch memory extension or the work group
9+ // static memory extension.
810
911#include < sycl/ext/oneapi/work_group_static.hpp>
1012
1113#include " helpers.hpp"
1214#include < cassert>
1315#include < sycl/ext/oneapi/experimental/enqueue_functions.hpp>
1416#include < sycl/ext/oneapi/free_function_queries.hpp>
17+ #include < sycl/group_barrier.hpp>
1518#include < sycl/usm.hpp>
1619
1720namespace syclext = sycl::ext::oneapi;
@@ -22,13 +25,24 @@ constexpr int SIZE = 16;
2225SYCL_EXT_ONEAPI_FUNCTION_PROPERTY ((syclexp::nd_range_kernel<1 >))
2326void double_kernel(float *src, float *dst) {
2427 size_t lid = syclext::this_work_item::get_nd_item<1 >().get_local_linear_id ();
25-
2628 float *local_mem = (float *)syclexp::get_work_group_scratch_memory ();
29+ local_mem[lid] = 2 * src[lid];
30+ dst[lid] = local_mem[lid];
31+ }
2732
28- for (int i = 0 ; i < SIZE; i++) {
29- local_mem[lid] = 2 * src[i];
30- dst[i] = local_mem[i];
33+ SYCL_EXT_ONEAPI_FUNCTION_PROPERTY ((syclexp::nd_range_kernel<1 >))
34+ void square_kernel(float *src, float *dst) {
35+ sycl::nd_item<1 > item = syclext::this_work_item::get_nd_item<1 >();
36+ size_t lid = item.get_local_linear_id ();
37+ syclexp::work_group_static<float [SIZE]> local_mem;
38+ local_mem[lid] = src[lid] * src[lid];
39+ sycl::group_barrier (item.get_group ());
40+ if (item.get_group ().leader ()) { // Check that memory is indeed shared between
41+ // the work group
42+ for (int i = 0 ; i < SIZE; ++i)
43+ assert (local_mem[i] == src[i] * src[i]);
3144 }
45+ dst[lid] = local_mem[lid];
3246}
3347
3448int main () {
@@ -43,20 +57,29 @@ int main() {
4357 auto kbndl =
4458 syclexp::get_kernel_bundle<double_kernel, sycl::bundle_state::executable>(
4559 q.get_context ());
46- sycl::kernel k = kbndl.template ext_oneapi_get_kernel <double_kernel>();
47-
48- syclexp::launch_config cfg{
60+ sycl::kernel DoubleKernel =
61+ kbndl.template ext_oneapi_get_kernel <double_kernel>();
62+ sycl::kernel SquareKernel =
63+ kbndl.template ext_oneapi_get_kernel <square_kernel>();
64+ syclexp::launch_config DoubleKernelcfg{
4965 ::sycl::nd_range<1 >(::sycl::range<1 >(SIZE), ::sycl::range<1 >(SIZE)),
5066 syclexp::properties{
5167 syclexp::work_group_scratch_size (SIZE * sizeof (float ))}};
68+ syclexp::launch_config SquareKernelcfg{
69+ ::sycl::nd_range<1 >(::sycl::range<1 >(SIZE), ::sycl::range<1 >(SIZE))};
5270
53- syclexp::nd_launch (q, cfg, k , src, dst);
71+ syclexp::nd_launch (q, DoubleKernelcfg, DoubleKernel , src, dst);
5472 q.wait ();
55-
5673 for (int i = 0 ; i < SIZE; i++) {
5774 assert (dst[i] == 2 * src[i]);
5875 }
5976
77+ syclexp::nd_launch (q, SquareKernelcfg, SquareKernel, src, dst);
78+ q.wait ();
79+ for (int i = 0 ; i < SIZE; i++) {
80+ assert (dst[i] == src[i] * src[i]);
81+ }
82+
6083 sycl::free (src, q);
6184 sycl::free (dst, q);
6285 return 0 ;
0 commit comments