@@ -18,17 +18,21 @@ class SharedSystemMemoryTests
18
18
protected:
19
19
void SetUp () override {
20
20
device = lzt::zeDevice::get_instance ()->get_device ();
21
- module = lzt::create_module (device, " memory_add.spv" );
22
- }
23
21
24
- void TearDown () override { lzt::destroy_module (module ); }
22
+ bool is_dst_shared_system = std::get<0 >(GetParam ()).first ;
23
+ bool is_src_shared_system = std::get<0 >(GetParam ()).second ;
24
+ if (is_dst_shared_system || is_src_shared_system) {
25
+ SKIP_IF_SHARED_SYSTEM_ALLOC_UNSUPPORTED ();
26
+ }
27
+ }
25
28
26
29
ze_device_handle_t device;
27
- ze_module_handle_t module ;
28
30
};
29
31
32
+ class SharedSystemMemoryLaunchKernelTests : public SharedSystemMemoryTests {};
33
+
30
34
LZT_TEST_P (
31
- SharedSystemMemoryTests ,
35
+ SharedSystemMemoryLaunchKernelTests ,
32
36
GivenSharedSystemMemoryAllocationsAsKernelArgumentsWhenKernelExecutesThenValuesAreCorrect) {
33
37
bool is_dst_shared_system = std::get<0 >(GetParam ()).first ;
34
38
bool is_src_shared_system = std::get<0 >(GetParam ()).second ;
@@ -39,12 +43,9 @@ LZT_TEST_P(
39
43
constexpr size_t group_size = 32 ;
40
44
ASSERT_EQ (buffer_size % (sizeof (int ) * group_size), 0 );
41
45
42
- if (is_dst_shared_system || is_src_shared_system) {
43
- SKIP_IF_SHARED_SYSTEM_ALLOC_UNSUPPORTED ();
44
- }
45
-
46
46
constexpr int source_value = 1234 ;
47
47
constexpr int add_value = 5678 ;
48
+ const size_t num_elements = buffer_size / sizeof (int );
48
49
49
50
void *result = lzt::allocate_shared_memory_with_allocator_selector (
50
51
buffer_size, 1 , 0 , 0 , device, is_dst_shared_system);
@@ -53,10 +54,12 @@ LZT_TEST_P(
53
54
54
55
memset (result, 0 , buffer_size);
55
56
int *source_as_int = reinterpret_cast <int *>(source);
56
- for (size_t i = 0 ; i < buffer_size / sizeof ( int ) ; i++) {
57
+ for (size_t i = 0 ; i < num_elements ; i++) {
57
58
source_as_int[i] = source_value;
58
59
}
59
60
61
+ ze_module_handle_t module = lzt::create_module (device, " memory_add.spv" );
62
+
60
63
const char *funcion_name =
61
64
use_atomic_kernel ? " memory_atomic_add" : " memory_add" ;
62
65
ze_kernel_handle_t function = lzt::create_function (module , funcion_name);
@@ -78,12 +81,13 @@ LZT_TEST_P(
78
81
lzt::execute_and_sync_command_bundle (cmd_bundle, UINT64_MAX);
79
82
80
83
int *result_as_int = reinterpret_cast <int *>(result);
81
- for (size_t i = 0 ; i < buffer_size / sizeof ( int ) ; i++) {
84
+ for (size_t i = 0 ; i < num_elements ; i++) {
82
85
EXPECT_EQ (result_as_int[i], source_value + add_value) << " index = " << i;
83
86
}
84
87
85
88
lzt::destroy_command_bundle (cmd_bundle);
86
89
lzt::destroy_function (function);
90
+ lzt::destroy_module (module );
87
91
88
92
lzt::free_memory_with_allocator_selector (source, is_src_shared_system);
89
93
lzt::free_memory_with_allocator_selector (result, is_dst_shared_system);
@@ -107,9 +111,13 @@ struct SharedSystemMemoryTestsNameSuffix {
107
111
return " _4KB" ;
108
112
case 0x1800u :
109
113
return " _6KB" ;
110
- case 0x100000u :
114
+ case 0x1'0000u :
115
+ return " _64KB" ;
116
+ case 0x1'0800u :
117
+ return " _66KB" ;
118
+ case 0x10'0000u :
111
119
return " _1MB" ;
112
- case 0x100800u :
120
+ case 0x10'0800u :
113
121
return " _1MB2KB" ;
114
122
case 0x4000'0000u :
115
123
return " _1GB" ;
@@ -131,11 +139,118 @@ struct SharedSystemMemoryTestsNameSuffix {
131
139
};
132
140
133
141
INSTANTIATE_TEST_SUITE_P (
134
- ParamSVMAllocationTests, SharedSystemMemoryTests,
142
+ ParamSVMAllocationLaunchKernelTests, SharedSystemMemoryLaunchKernelTests,
143
+ testing::Combine (testing::Values(std::make_pair(true , false ),
144
+ std::make_pair(false , true ),
145
+ std::make_pair(true , true )),
146
+ testing::Bool(), testing::Bool(),
147
+ testing::Values(0x80u , 0x1000u , 0x1800u , 0x10'0000u ,
148
+ 0x10'0800u , 0x4000'0000u , 0x4000'0800u )),
149
+ SharedSystemMemoryTestsNameSuffix());
150
+
151
+ class SharedSystemMemoryLaunchCooperativeKernelTests
152
+ : public SharedSystemMemoryTests {};
153
+
154
+ LZT_TEST_P (
155
+ SharedSystemMemoryLaunchCooperativeKernelTests,
156
+ GivenSharedSystemMemoryAllocationsAsKernelArgumentsWhenCooperativeKernelExecutesThenValueIsCorrect) {
157
+ int ordinal = -1 ;
158
+ auto command_queue_group_properties =
159
+ lzt::get_command_queue_group_properties (device);
160
+ for (int i = 0 ; i < command_queue_group_properties.size (); i++) {
161
+ if (command_queue_group_properties[i].flags &
162
+ ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS) {
163
+ ordinal = i;
164
+ break ;
165
+ }
166
+ }
167
+ if (ordinal < 0 ) {
168
+ LOG_WARNING << " No command queues that support cooperative kernels" ;
169
+ GTEST_SKIP ();
170
+ }
171
+
172
+ const bool is_dst_shared_system = std::get<0 >(GetParam ()).first ;
173
+ const bool is_src_shared_system = std::get<0 >(GetParam ()).second ;
174
+ const bool use_atomic_kernel = std::get<1 >(GetParam ());
175
+ const bool use_immediate_cmdlist = std::get<2 >(GetParam ());
176
+ const size_t buffer_size = std::get<3 >(GetParam ());
177
+ const size_t num_elements = buffer_size / sizeof (int );
178
+ LOG_INFO << " Num elements: " << num_elements;
179
+
180
+ auto compute_properties = lzt::get_compute_properties (device);
181
+
182
+ void *input = lzt::allocate_shared_memory_with_allocator_selector (
183
+ buffer_size, 1 , 0 , 0 , device, is_src_shared_system);
184
+
185
+ int *input_as_int = reinterpret_cast <int *>(input);
186
+ for (size_t i = 0 ; i < num_elements; i++) {
187
+ input_as_int[i] = 1 ;
188
+ }
189
+
190
+ ze_module_handle_t module =
191
+ lzt::create_module (device, " cooperative_reduction.spv" );
192
+ const char *function_name = use_atomic_kernel ? " cooperative_reduction_atomic"
193
+ : " cooperative_reduction" ;
194
+ ze_kernel_handle_t function = lzt::create_function (module , function_name);
195
+
196
+ uint32_t max_coop_group_count = 1 ;
197
+ lzt::suggest_max_cooperative_group_count (function, max_coop_group_count);
198
+ ASSERT_GT (max_coop_group_count, 0 );
199
+
200
+ uint32_t suggested_group_count = [](uint32_t n) {
201
+ n |= n >> 1 ;
202
+ n |= n >> 2 ;
203
+ n |= n >> 4 ;
204
+ n |= n >> 8 ;
205
+ n |= n >> 16 ;
206
+ return n - (n >> 1 );
207
+ }(max_coop_group_count);
208
+
209
+ uint32_t group_count = (num_elements < suggested_group_count)
210
+ ? num_elements
211
+ : suggested_group_count;
212
+ LOG_INFO << " Group count: " << group_count;
213
+
214
+ void *output = lzt::allocate_shared_memory_with_allocator_selector (
215
+ group_count * sizeof (int ), 1 , 0 , 0 , device, is_dst_shared_system);
216
+
217
+ uint32_t group_size = num_elements / group_count;
218
+ LOG_INFO << " Group size: " << group_size;
219
+ ASSERT_LE (group_size, compute_properties.maxGroupSizeX );
220
+
221
+ lzt::set_group_size (function, group_size, 1 , 1 );
222
+ lzt::set_argument_value (function, 0 , sizeof (input), &input);
223
+ lzt::set_argument_value (function, 1 , sizeof (output), &output);
224
+ lzt::set_argument_value (function, 2 , group_size * sizeof (int ), nullptr );
225
+
226
+ lzt::zeCommandBundle cmd_bundle = lzt::create_command_bundle (
227
+ lzt::get_default_context (), device, 0 , ordinal, use_immediate_cmdlist);
228
+
229
+ ze_group_count_t thread_group_dimensions = {group_count, 1 , 1 };
230
+ lzt::append_launch_cooperative_function (
231
+ cmd_bundle.list , function, &thread_group_dimensions, nullptr , 0 , nullptr );
232
+
233
+ lzt::close_command_list (cmd_bundle.list );
234
+ lzt::execute_and_sync_command_bundle (cmd_bundle, UINT64_MAX);
235
+
236
+ int *result = reinterpret_cast <int *>(output);
237
+ EXPECT_EQ (result[0 ], num_elements);
238
+
239
+ lzt::destroy_command_bundle (cmd_bundle);
240
+ lzt::destroy_function (function);
241
+ lzt::destroy_module (module );
242
+
243
+ lzt::free_memory_with_allocator_selector (output, is_dst_shared_system);
244
+ lzt::free_memory_with_allocator_selector (input, is_src_shared_system);
245
+ }
246
+
247
+ INSTANTIATE_TEST_SUITE_P (
248
+ ParamSVMAllocationLaunchCooperativeKernelTests,
249
+ SharedSystemMemoryLaunchCooperativeKernelTests,
135
250
testing::Combine (testing::Values(std::make_pair(true , false ),
136
251
std::make_pair(false , true ),
137
252
std::make_pair(true , true )),
138
253
testing::Bool(), testing::Bool(),
139
- testing::Values(0x80u , 0x1000u , 0x1800u , 0x100000u ,
140
- 0x100800u , 0x4000'0000u , 0x4000 '0800u )),
254
+ testing::Values(0x80u , 0x1000u , 0x1800u , 0x1'0000u ,
255
+ 0x1 '0800u )),
141
256
SharedSystemMemoryTestsNameSuffix());
0 commit comments