Skip to content

Commit ae3cdb1

Browse files
Add test cases for zeCommandListAppendLaunchCooperativeKernel with SVM (#285)
* Add test cases for zeCommandListAppendLaunchCooperativeKernel with SVM Signed-off-by: Misiak, Konstanty <[email protected]>
1 parent 9273833 commit ae3cdb1

File tree

9 files changed

+237
-18
lines changed

9 files changed

+237
-18
lines changed

conformance_tests/core/test_memory/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2019 Intel Corporation
1+
# Copyright (C) 2019-2025 Intel Corporation
22
# SPDX-License-Identifier: MIT
33

44
if(UNIX)
@@ -24,6 +24,7 @@ add_lzt_test(
2424
level_zero_tests::random
2525
${OS_SPECIFIC_LIBS}
2626
KERNELS
27+
cooperative_reduction
2728
memory_add
2829
unified_mem_test
2930
write_memory_pattern
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
*
3+
* Copyright (C) 2025 Intel Corporation
4+
*
5+
* SPDX-License-Identifier: MIT
6+
*
7+
*/
8+
9+
__kernel void cooperative_reduction(__global const int* input, __global int* output,
10+
__local int* local_sums) {
11+
int gid = get_global_id(0);
12+
int group_id = get_group_id(0);
13+
int lid = get_local_id(0);
14+
int local_size = get_local_size(0);
15+
16+
local_sums[lid] = input[gid];
17+
barrier(CLK_LOCAL_MEM_FENCE);
18+
19+
if (lid == 0) {
20+
for (uint i = 1; i < local_size; i++) {
21+
local_sums[0] += local_sums[i];
22+
}
23+
output[group_id] = local_sums[0];
24+
}
25+
26+
// Device-wide barrier: synchronize all workgroups
27+
global_barrier();
28+
29+
if (group_id == 0 && lid == 0) {
30+
for (uint i = 1; i < get_num_groups(0); i++) {
31+
output[0] += output[i];
32+
}
33+
}
34+
}
35+
36+
__kernel void cooperative_reduction_atomic(__global const int* input, __global int* output,
37+
__local int* local_sums) {
38+
int gid = get_global_id(0);
39+
int group_id = get_group_id(0);
40+
int lid = get_local_id(0);
41+
int local_size = get_local_size(0);
42+
43+
local_sums[lid] = input[gid];
44+
barrier(CLK_LOCAL_MEM_FENCE);
45+
46+
if (lid == 0) {
47+
for (uint i = 1; i < local_size; i++) {
48+
atomic_add(&local_sums[0], local_sums[i]);
49+
}
50+
output[group_id] = local_sums[0];
51+
}
52+
53+
// Device-wide barrier: synchronize all workgroups
54+
global_barrier();
55+
56+
if (group_id == 0 && lid == 0) {
57+
for (uint i = 1; i < get_num_groups(0); i++) {
58+
atomic_add(&output[0], output[i]);
59+
}
60+
}
61+
}
6.41 KB
Binary file not shown.

conformance_tests/core/test_memory/src/test_svm.cpp

Lines changed: 131 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,21 @@ class SharedSystemMemoryTests
1818
protected:
1919
void SetUp() override {
2020
device = lzt::zeDevice::get_instance()->get_device();
21-
module = lzt::create_module(device, "memory_add.spv");
22-
}
2321

24-
void TearDown() override { lzt::destroy_module(module); }
22+
bool is_dst_shared_system = std::get<0>(GetParam()).first;
23+
bool is_src_shared_system = std::get<0>(GetParam()).second;
24+
if (is_dst_shared_system || is_src_shared_system) {
25+
SKIP_IF_SHARED_SYSTEM_ALLOC_UNSUPPORTED();
26+
}
27+
}
2528

2629
ze_device_handle_t device;
27-
ze_module_handle_t module;
2830
};
2931

32+
class SharedSystemMemoryLaunchKernelTests : public SharedSystemMemoryTests {};
33+
3034
LZT_TEST_P(
31-
SharedSystemMemoryTests,
35+
SharedSystemMemoryLaunchKernelTests,
3236
GivenSharedSystemMemoryAllocationsAsKernelArgumentsWhenKernelExecutesThenValuesAreCorrect) {
3337
bool is_dst_shared_system = std::get<0>(GetParam()).first;
3438
bool is_src_shared_system = std::get<0>(GetParam()).second;
@@ -39,12 +43,9 @@ LZT_TEST_P(
3943
constexpr size_t group_size = 32;
4044
ASSERT_EQ(buffer_size % (sizeof(int) * group_size), 0);
4145

42-
if (is_dst_shared_system || is_src_shared_system) {
43-
SKIP_IF_SHARED_SYSTEM_ALLOC_UNSUPPORTED();
44-
}
45-
4646
constexpr int source_value = 1234;
4747
constexpr int add_value = 5678;
48+
const size_t num_elements = buffer_size / sizeof(int);
4849

4950
void *result = lzt::allocate_shared_memory_with_allocator_selector(
5051
buffer_size, 1, 0, 0, device, is_dst_shared_system);
@@ -53,10 +54,12 @@ LZT_TEST_P(
5354

5455
memset(result, 0, buffer_size);
5556
int *source_as_int = reinterpret_cast<int *>(source);
56-
for (size_t i = 0; i < buffer_size / sizeof(int); i++) {
57+
for (size_t i = 0; i < num_elements; i++) {
5758
source_as_int[i] = source_value;
5859
}
5960

61+
ze_module_handle_t module = lzt::create_module(device, "memory_add.spv");
62+
6063
const char *funcion_name =
6164
use_atomic_kernel ? "memory_atomic_add" : "memory_add";
6265
ze_kernel_handle_t function = lzt::create_function(module, funcion_name);
@@ -78,12 +81,13 @@ LZT_TEST_P(
7881
lzt::execute_and_sync_command_bundle(cmd_bundle, UINT64_MAX);
7982

8083
int *result_as_int = reinterpret_cast<int *>(result);
81-
for (size_t i = 0; i < buffer_size / sizeof(int); i++) {
84+
for (size_t i = 0; i < num_elements; i++) {
8285
EXPECT_EQ(result_as_int[i], source_value + add_value) << "index = " << i;
8386
}
8487

8588
lzt::destroy_command_bundle(cmd_bundle);
8689
lzt::destroy_function(function);
90+
lzt::destroy_module(module);
8791

8892
lzt::free_memory_with_allocator_selector(source, is_src_shared_system);
8993
lzt::free_memory_with_allocator_selector(result, is_dst_shared_system);
@@ -107,9 +111,13 @@ struct SharedSystemMemoryTestsNameSuffix {
107111
return "_4KB";
108112
case 0x1800u:
109113
return "_6KB";
110-
case 0x100000u:
114+
case 0x1'0000u:
115+
return "_64KB";
116+
case 0x1'0800u:
117+
return "_66KB";
118+
case 0x10'0000u:
111119
return "_1MB";
112-
case 0x100800u:
120+
case 0x10'0800u:
113121
return "_1MB2KB";
114122
case 0x4000'0000u:
115123
return "_1GB";
@@ -131,11 +139,118 @@ struct SharedSystemMemoryTestsNameSuffix {
131139
};
132140

133141
INSTANTIATE_TEST_SUITE_P(
134-
ParamSVMAllocationTests, SharedSystemMemoryTests,
142+
ParamSVMAllocationLaunchKernelTests, SharedSystemMemoryLaunchKernelTests,
143+
testing::Combine(testing::Values(std::make_pair(true, false),
144+
std::make_pair(false, true),
145+
std::make_pair(true, true)),
146+
testing::Bool(), testing::Bool(),
147+
testing::Values(0x80u, 0x1000u, 0x1800u, 0x10'0000u,
148+
0x10'0800u, 0x4000'0000u, 0x4000'0800u)),
149+
SharedSystemMemoryTestsNameSuffix());
150+
151+
class SharedSystemMemoryLaunchCooperativeKernelTests
152+
: public SharedSystemMemoryTests {};
153+
154+
LZT_TEST_P(
155+
SharedSystemMemoryLaunchCooperativeKernelTests,
156+
GivenSharedSystemMemoryAllocationsAsKernelArgumentsWhenCooperativeKernelExecutesThenValueIsCorrect) {
157+
int ordinal = -1;
158+
auto command_queue_group_properties =
159+
lzt::get_command_queue_group_properties(device);
160+
for (int i = 0; i < command_queue_group_properties.size(); i++) {
161+
if (command_queue_group_properties[i].flags &
162+
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS) {
163+
ordinal = i;
164+
break;
165+
}
166+
}
167+
if (ordinal < 0) {
168+
LOG_WARNING << "No command queues that support cooperative kernels";
169+
GTEST_SKIP();
170+
}
171+
172+
const bool is_dst_shared_system = std::get<0>(GetParam()).first;
173+
const bool is_src_shared_system = std::get<0>(GetParam()).second;
174+
const bool use_atomic_kernel = std::get<1>(GetParam());
175+
const bool use_immediate_cmdlist = std::get<2>(GetParam());
176+
const size_t buffer_size = std::get<3>(GetParam());
177+
const size_t num_elements = buffer_size / sizeof(int);
178+
LOG_INFO << "Num elements: " << num_elements;
179+
180+
auto compute_properties = lzt::get_compute_properties(device);
181+
182+
void *input = lzt::allocate_shared_memory_with_allocator_selector(
183+
buffer_size, 1, 0, 0, device, is_src_shared_system);
184+
185+
int *input_as_int = reinterpret_cast<int *>(input);
186+
for (size_t i = 0; i < num_elements; i++) {
187+
input_as_int[i] = 1;
188+
}
189+
190+
ze_module_handle_t module =
191+
lzt::create_module(device, "cooperative_reduction.spv");
192+
const char *function_name = use_atomic_kernel ? "cooperative_reduction_atomic"
193+
: "cooperative_reduction";
194+
ze_kernel_handle_t function = lzt::create_function(module, function_name);
195+
196+
uint32_t max_coop_group_count = 1;
197+
lzt::suggest_max_cooperative_group_count(function, max_coop_group_count);
198+
ASSERT_GT(max_coop_group_count, 0);
199+
200+
uint32_t suggested_group_count = [](uint32_t n) {
201+
n |= n >> 1;
202+
n |= n >> 2;
203+
n |= n >> 4;
204+
n |= n >> 8;
205+
n |= n >> 16;
206+
return n - (n >> 1);
207+
}(max_coop_group_count);
208+
209+
uint32_t group_count = (num_elements < suggested_group_count)
210+
? num_elements
211+
: suggested_group_count;
212+
LOG_INFO << "Group count: " << group_count;
213+
214+
void *output = lzt::allocate_shared_memory_with_allocator_selector(
215+
group_count * sizeof(int), 1, 0, 0, device, is_dst_shared_system);
216+
217+
uint32_t group_size = num_elements / group_count;
218+
LOG_INFO << "Group size: " << group_size;
219+
ASSERT_LE(group_size, compute_properties.maxGroupSizeX);
220+
221+
lzt::set_group_size(function, group_size, 1, 1);
222+
lzt::set_argument_value(function, 0, sizeof(input), &input);
223+
lzt::set_argument_value(function, 1, sizeof(output), &output);
224+
lzt::set_argument_value(function, 2, group_size * sizeof(int), nullptr);
225+
226+
lzt::zeCommandBundle cmd_bundle = lzt::create_command_bundle(
227+
lzt::get_default_context(), device, 0, ordinal, use_immediate_cmdlist);
228+
229+
ze_group_count_t thread_group_dimensions = {group_count, 1, 1};
230+
lzt::append_launch_cooperative_function(
231+
cmd_bundle.list, function, &thread_group_dimensions, nullptr, 0, nullptr);
232+
233+
lzt::close_command_list(cmd_bundle.list);
234+
lzt::execute_and_sync_command_bundle(cmd_bundle, UINT64_MAX);
235+
236+
int *result = reinterpret_cast<int *>(output);
237+
EXPECT_EQ(result[0], num_elements);
238+
239+
lzt::destroy_command_bundle(cmd_bundle);
240+
lzt::destroy_function(function);
241+
lzt::destroy_module(module);
242+
243+
lzt::free_memory_with_allocator_selector(output, is_dst_shared_system);
244+
lzt::free_memory_with_allocator_selector(input, is_src_shared_system);
245+
}
246+
247+
INSTANTIATE_TEST_SUITE_P(
248+
ParamSVMAllocationLaunchCooperativeKernelTests,
249+
SharedSystemMemoryLaunchCooperativeKernelTests,
135250
testing::Combine(testing::Values(std::make_pair(true, false),
136251
std::make_pair(false, true),
137252
std::make_pair(true, true)),
138253
testing::Bool(), testing::Bool(),
139-
testing::Values(0x80u, 0x1000u, 0x1800u, 0x100000u,
140-
0x100800u, 0x4000'0000u, 0x4000'0800u)),
254+
testing::Values(0x80u, 0x1000u, 0x1800u, 0x1'0000u,
255+
0x1'0800u)),
141256
SharedSystemMemoryTestsNameSuffix());

scripts/level_zero_report_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def assign_test_feature_tag(test_feature: str, test_name: str, test_section: str
5858
(test_name.find("Thread") != -1) or \
5959
(test_name.find("Affinity") != -1) or \
6060
(test_name.find("Luid") != -1) or \
61-
(test_name.find("ParamSVMAllocationTests_SharedSystemMemoryTests") != -1) or \
61+
(test_name.find("SharedSystemMemoryLaunchKernelTests") != -1) or \
62+
(test_name.find("SharedSystemMemoryLaunchCooperativeKernelTests") != -1) or \
6263
(re.search('concurrent', test_name, re.IGNORECASE)) or \
6364
(re.search('context', test_name, re.IGNORECASE)) or \
6465
(re.search('KernelOffset', test_name, re.IGNORECASE)) or \

utils/test_harness/include/test_harness/test_harness_cmdlist.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ void append_launch_function(ze_command_list_handle_t hCommandList,
160160
ze_event_handle_t hSignalEvent,
161161
uint32_t numWaitEvents,
162162
ze_event_handle_t *phWaitEvents);
163+
void append_launch_cooperative_function(ze_command_list_handle_t hCommandList,
164+
ze_kernel_handle_t hFunction,
165+
const ze_group_count_t *pLaunchFuncArgs,
166+
ze_event_handle_t hSignalEvent,
167+
uint32_t numWaitEvents,
168+
ze_event_handle_t *phWaitEvents);
163169
void append_signal_event(ze_command_list_handle_t hCommandList,
164170
ze_event_handle_t hEvent);
165171
void append_wait_on_events(ze_command_list_handle_t hCommandList,

utils/test_harness/include/test_harness/test_harness_module.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ void suggest_group_size(ze_kernel_handle_t hFunction, uint32_t globalSizeX,
6565
uint32_t globalSizeY, uint32_t globalSizeZ,
6666
uint32_t &groupSizeX, uint32_t &groupSizeY,
6767
uint32_t &groupSizeZ);
68+
void suggest_max_cooperative_group_count(ze_kernel_handle_t hFunction,
69+
uint32_t &max_cooperative_group_count);
6870
void set_group_size(ze_kernel_handle_t hFunction, uint32_t groupSizeX,
6971
uint32_t groupSizeY, uint32_t groupSizeZ);
7072
ze_kernel_handle_t create_function(ze_module_handle_t module,

utils/test_harness/src/test_harness_cmdlist.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,31 @@ void append_launch_function(ze_command_list_handle_t hCommandList,
450450
}
451451
}
452452

453+
void append_launch_cooperative_function(ze_command_list_handle_t hCommandList,
454+
ze_kernel_handle_t hFunction,
455+
const ze_group_count_t *pLaunchFuncArgs,
456+
ze_event_handle_t hSignalEvent,
457+
uint32_t numWaitEvents,
458+
ze_event_handle_t *phWaitEvents) {
459+
auto command_list_initial = hCommandList;
460+
auto function_initial = hFunction;
461+
auto signal_event_initial = hSignalEvent;
462+
std::vector<ze_event_handle_t> wait_events_initial(numWaitEvents);
463+
if (phWaitEvents) {
464+
std::memcpy(wait_events_initial.data(), phWaitEvents,
465+
sizeof(ze_event_handle_t) * numWaitEvents);
466+
}
467+
EXPECT_ZE_RESULT_SUCCESS(zeCommandListAppendLaunchCooperativeKernel(
468+
hCommandList, hFunction, pLaunchFuncArgs, hSignalEvent, numWaitEvents,
469+
phWaitEvents));
470+
EXPECT_EQ(hCommandList, command_list_initial);
471+
EXPECT_EQ(hFunction, function_initial);
472+
EXPECT_EQ(hSignalEvent, signal_event_initial);
473+
for (int i = 0; i < numWaitEvents && phWaitEvents; i++) {
474+
EXPECT_EQ(phWaitEvents[i], wait_events_initial[i]);
475+
}
476+
}
477+
453478
void append_signal_event(ze_command_list_handle_t hCommandList,
454479
ze_event_handle_t hEvent) {
455480
auto command_list_initial = hCommandList;

utils/test_harness/src/test_harness_module.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,14 @@ void suggest_group_size(ze_kernel_handle_t hFunction, uint32_t globalSizeX,
232232
EXPECT_EQ(hFunction, function_initial);
233233
}
234234

235+
void suggest_max_cooperative_group_count(
236+
ze_kernel_handle_t hFunction, uint32_t &max_cooperative_group_count) {
237+
auto function_initial = hFunction;
238+
EXPECT_ZE_RESULT_SUCCESS(zeKernelSuggestMaxCooperativeGroupCount(
239+
hFunction, &max_cooperative_group_count));
240+
EXPECT_EQ(hFunction, function_initial);
241+
}
242+
235243
void destroy_module(ze_module_handle_t module) {
236244
EXPECT_ZE_RESULT_SUCCESS(zeModuleDestroy(module));
237245
}

0 commit comments

Comments
 (0)