|
| 1 | +// RUN: %{build} -o %{t.out} |
| 2 | +// RUN: %{run} %{t.out} |
| 3 | + |
| 4 | +#include <type_traits> |
| 5 | +#include <cstdlib> |
| 6 | +#include <iostream> |
| 7 | +#include <sycl/detail/core.hpp> |
| 8 | +#include <sycl/ext/oneapi/experimental/work_group_memory.hpp> |
| 9 | +#include <sycl/usm.hpp> |
| 10 | + |
| 11 | +// Sanity test that checks to see if idiomatic code involving work_group_memory objects compiles and runs with no errors. |
| 12 | + |
| 13 | +namespace syclex = sycl::ext::oneapi::experimental; |
| 14 | +sycl::queue global_q; |
| 15 | + |
| 16 | +constexpr size_t SIZE = 4096; |
| 17 | +constexpr size_t WGSIZE = 256; |
| 18 | + |
| 19 | +struct point { |
| 20 | + int x; |
| 21 | + int y; |
| 22 | +}; |
| 23 | + |
| 24 | +void simple_inc(const syclex::work_group_memory<int>& mem) { |
| 25 | + mem++; |
| 26 | +} |
| 27 | + |
| 28 | +void fancy_inc(syclex::work_group_memory<int> mem) { |
| 29 | + syclex::work_group_memory<int> t = mem; |
| 30 | + t = mem; |
| 31 | + t++; |
| 32 | +} |
| 33 | + |
| 34 | +void test_breadth() { |
| 35 | + sycl::queue q; |
| 36 | + global_q = q; |
| 37 | + |
| 38 | + int *res = sycl::malloc_host<int>(16, q); |
| 39 | + |
| 40 | + q.submit([&](sycl::handler &cgh) { |
| 41 | + syclex::work_group_memory<int> mem1{cgh}; |
| 42 | + syclex::work_group_memory<int[10]> mem2{cgh}; |
| 43 | + syclex::work_group_memory<int[10]> mem3{cgh}; |
| 44 | + syclex::work_group_memory<int[]> mem4{5, cgh}; |
| 45 | + syclex::work_group_memory<int[][10]> mem5{2, cgh}; |
| 46 | + syclex::work_group_memory<int[][10]> mem6{2, cgh}; |
| 47 | + syclex::work_group_memory<point> mem7{cgh}; |
| 48 | + syclex::work_group_memory<point[][10]> mem8{2, cgh}; |
| 49 | + |
| 50 | + cgh.single_task([=] { |
| 51 | + // Operations on scalar |
| 52 | + ++mem1; |
| 53 | + mem1++; |
| 54 | + mem1 += 1; |
| 55 | + mem1 = mem1 + 1; |
| 56 | + int *p1 = &mem1; (*p1)++; |
| 57 | + simple_inc(mem1); |
| 58 | + fancy_inc(mem1); |
| 59 | + res[0] = *(mem1.get_multi_ptr()); |
| 60 | + res[1] = mem1; |
| 61 | + |
| 62 | + // Operations on bounded array |
| 63 | + mem2[4] = mem2[4] + 1; |
| 64 | + int (*p2)[10] = &mem2; (*p2)[4]++; |
| 65 | + res[2] = mem2.get_multi_ptr()[4]; |
| 66 | + res[3] = mem2[4]; |
| 67 | + |
| 68 | + mem3[4] = mem3[4] + 1; |
| 69 | + int (*p3)[10] = &mem3; (*p3)[4]++; |
| 70 | + res[4] = mem3.get_multi_ptr()[4]; |
| 71 | + res[5] = mem3[4]; |
| 72 | + |
| 73 | + // Operations on unbounded array |
| 74 | + mem4[4] = mem4[4] + 1; |
| 75 | + int (*p4)[] = &mem4; (*p4)[4]++; |
| 76 | + res[6] = mem4.get_multi_ptr()[4]; |
| 77 | + res[7] = mem4[4]; |
| 78 | + |
| 79 | + // Operations on unbounded multi-dimensional array |
| 80 | + mem5[1][5] = mem5[1][5] + 1; |
| 81 | + mem5[1][7] = mem5[1][7] + 1; |
| 82 | + res[8] = mem5.get_multi_ptr()[10 + 5]; |
| 83 | + res[9] = mem5[1][7]; |
| 84 | + |
| 85 | + mem6[1][5] = mem6[1][5] + 1; |
| 86 | + mem6[1][7] = mem6[1][7] + 1; |
| 87 | + res[10] = mem6.get_multi_ptr()[10 + 5]; |
| 88 | + res[11] = mem6[1][7]; |
| 89 | + |
| 90 | + // Operations on scalar struct |
| 91 | + (&mem7)->x++; |
| 92 | + (&mem7)->y += 1; |
| 93 | + point pnt = mem7; |
| 94 | + pnt.x++; |
| 95 | + pnt.y++; |
| 96 | + mem7 = pnt; |
| 97 | + res[12] = (&mem7)->x; |
| 98 | + res[13] = (&mem7)->y; |
| 99 | + |
| 100 | + // Operations on unbounded multi-dimensional array of struct |
| 101 | + mem8[1][5].x++; |
| 102 | + mem8[1][5].y += 1; |
| 103 | + res[14] = mem8.get_multi_ptr()[10 + 5].x; |
| 104 | + res[15] = mem8[1][5].y; |
| 105 | + }); |
| 106 | + }).wait(); |
| 107 | +} |
| 108 | + |
| 109 | +void test_basic() { |
| 110 | + sycl::queue q; |
| 111 | + |
| 112 | + q.submit([&](sycl::handler &cgh) { |
| 113 | + // Allocate one element for each work-item in the work-group. |
| 114 | + syclex::work_group_memory<int[WGSIZE]> mem{cgh}; |
| 115 | + |
| 116 | + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; |
| 117 | + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { |
| 118 | + size_t id = it.get_local_linear_id(); |
| 119 | + |
| 120 | + // Each work-item has its own dedicated element of the array. |
| 121 | + mem[id] = 0; |
| 122 | + }); |
| 123 | + }).wait(); |
| 124 | +} |
| 125 | + |
| 126 | +void test_operations() { |
| 127 | + sycl::queue q; |
| 128 | + |
| 129 | + q.submit([&](sycl::handler &cgh) { |
| 130 | + syclex::work_group_memory<int> mem1{cgh}; // scalar |
| 131 | + syclex::work_group_memory<int[10]> mem2{cgh}; // bounded array |
| 132 | + syclex::work_group_memory<int[]> mem3{5, cgh}; // unbounded array |
| 133 | + syclex::work_group_memory<int[][10]> mem4{2, cgh}; // multi-dimensional array |
| 134 | + syclex::work_group_memory<point[10]> mem5{cgh}; // array of struct |
| 135 | + |
| 136 | + sycl::nd_range ndr{{SIZE}, {WGSIZE}}; |
| 137 | + cgh.parallel_for(ndr, [=](sycl::nd_item<> it) { |
| 138 | + if (it.get_group().leader()) { |
| 139 | + // A "work_group_memory" templated on a scalar type acts much like the |
| 140 | + // enclosed scalar type. |
| 141 | + ++mem1; |
| 142 | + mem1++; |
| 143 | + mem1 += 1; |
| 144 | + mem1 = mem1 + 1; |
| 145 | + int *p1 = &mem1; |
| 146 | + |
| 147 | + // A "work_group_memory" templated on an array type (either bounded or |
| 148 | + // unbounded) acts like an array. |
| 149 | + ++mem2[4]; |
| 150 | + mem2[4]++; |
| 151 | + mem2[4] = mem2[4] + 1; |
| 152 | + int *p2 = &mem2[4]; |
| 153 | + |
| 154 | + // A multi-dimensional array works as expected. |
| 155 | + mem4[1][5] = mem4[1][5] + 1; |
| 156 | + mem4[1][7] = mem4[1][7] + 1; |
| 157 | + |
| 158 | + // An array of structs works as expected too. |
| 159 | + mem5[1].x++; |
| 160 | + mem5[1].y = mem5[1].y + 1; |
| 161 | + } |
| 162 | + }); |
| 163 | + }).wait(); |
| 164 | +} |
| 165 | + |
| 166 | +int main() { |
| 167 | + test_breadth(); |
| 168 | + test_basic(); |
| 169 | + test_operations(); |
| 170 | +} |
0 commit comments