Skip to content

Commit 492721a

Browse files
[4/n] L0 immediate commandlist improvements
Add black box test for immediate command list usage Related-To: LOCI-1988 Signed-off-by: Aravind Gopalakrishnan <[email protected]>
1 parent f1c50a8 commit 492721a

File tree

2 files changed

+284
-1
lines changed

2 files changed

+284
-1
lines changed

level_zero/core/test/black_box_tests/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2020-2021 Intel Corporation
2+
# Copyright (C) 2020-2022 Intel Corporation
33
#
44
# SPDX-License-Identifier: MIT
55
#
@@ -24,6 +24,7 @@ set(TEST_TARGETS
2424
zello_image_view
2525
zello_dynamic_link
2626
zello_events
27+
zello_immediate
2728
)
2829

2930
include_directories(common)
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/*
2+
* Copyright (C) 2022 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include <level_zero/ze_api.h>
9+
10+
#include "zello_common.h"
11+
12+
#include <fstream>
13+
#include <iomanip>
14+
#include <iostream>
15+
#include <limits>
16+
#include <memory>
17+
18+
bool verbose = false;
19+
20+
void createImmediateCommandList(ze_device_handle_t &device,
21+
ze_context_handle_t &context,
22+
uint32_t queueGroupOrdinal,
23+
bool syncMode,
24+
ze_command_list_handle_t &cmdList) {
25+
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
26+
cmdQueueDesc.pNext = nullptr;
27+
cmdQueueDesc.flags = 0;
28+
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
29+
cmdQueueDesc.ordinal = queueGroupOrdinal;
30+
cmdQueueDesc.index = 0;
31+
if (syncMode) {
32+
if (verbose)
33+
std::cout << "Choosing Command Queue mode synchronous" << std::endl;
34+
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
35+
} else {
36+
if (verbose)
37+
std::cout << "Choosing Command Queue mode asynchronous" << std::endl;
38+
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
39+
}
40+
SUCCESS_OR_TERMINATE(zeCommandListCreateImmediate(context, device, &cmdQueueDesc, &cmdList));
41+
}
42+
43+
void testCopyBetweenHostMemAndDeviceMem(ze_context_handle_t &context, ze_device_handle_t &device, bool syncMode, bool &validRet) {
44+
const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder
45+
char *hostBuffer = nullptr;
46+
void *deviceBuffer = nullptr;
47+
char *stackBuffer = new char[allocSize];
48+
ze_command_list_handle_t cmdList;
49+
50+
uint32_t copyQueueGroup = getCopyOnlyCommandQueueOrdinal(device);
51+
52+
createImmediateCommandList(device, context, copyQueueGroup, syncMode, cmdList);
53+
54+
ze_host_mem_alloc_desc_t hostDesc = {};
55+
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
56+
hostDesc.pNext = nullptr;
57+
hostDesc.flags = 0;
58+
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer)));
59+
60+
ze_device_mem_alloc_desc_t deviceDesc = {};
61+
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
62+
deviceDesc.ordinal = 0;
63+
deviceDesc.flags = 0;
64+
deviceDesc.pNext = nullptr;
65+
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &deviceBuffer));
66+
67+
for (size_t i = 0; i < allocSize; ++i) {
68+
hostBuffer[i] = static_cast<char>(i + 1);
69+
}
70+
memset(stackBuffer, 0, allocSize);
71+
72+
// Create Events for synchronization
73+
ze_event_pool_handle_t eventPoolDevice, eventPoolHost;
74+
uint32_t numEvents = 2;
75+
std::vector<ze_event_handle_t> deviceEvents(numEvents), hostEvents(numEvents);
76+
createEventPoolAndEvents(context, device, eventPoolDevice,
77+
(ze_event_pool_flag_t)(0),
78+
numEvents, deviceEvents.data(),
79+
ZE_EVENT_SCOPE_FLAG_SUBDEVICE,
80+
(ze_event_scope_flag_t)0);
81+
createEventPoolAndEvents(context, device, eventPoolHost,
82+
(ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE),
83+
numEvents, hostEvents.data(),
84+
ZE_EVENT_SCOPE_FLAG_HOST,
85+
(ze_event_scope_flag_t)0);
86+
87+
// Copy from host-allocated to device-allocated memory
88+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, deviceBuffer, hostBuffer, allocSize,
89+
syncMode ? nullptr : deviceEvents[0],
90+
0, nullptr));
91+
92+
// Copy from device-allocated memory to stack
93+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, deviceBuffer, allocSize,
94+
syncMode ? nullptr : hostEvents[0],
95+
syncMode ? 0 : 1,
96+
syncMode ? nullptr : &deviceEvents[0]));
97+
98+
if (!syncMode) {
99+
// If Async mode, use event for sync
100+
SUCCESS_OR_TERMINATE(zeEventHostSynchronize(hostEvents[0], std::numeric_limits<uint64_t>::max() - 1));
101+
}
102+
103+
// Validate stack and xe deviceBuffers have the original data from hostBuffer
104+
validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize));
105+
106+
delete[] stackBuffer;
107+
108+
for (auto event : hostEvents) {
109+
SUCCESS_OR_TERMINATE(zeEventDestroy(event));
110+
}
111+
for (auto event : deviceEvents) {
112+
SUCCESS_OR_TERMINATE(zeEventDestroy(event));
113+
}
114+
115+
SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolHost));
116+
SUCCESS_OR_TERMINATE(zeEventPoolDestroy(eventPoolDevice));
117+
SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer));
118+
SUCCESS_OR_TERMINATE(zeMemFree(context, deviceBuffer));
119+
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
120+
}
121+
122+
void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool syncMode, bool &outputValidationSuccessful) {
123+
ze_command_list_handle_t cmdList;
124+
125+
uint32_t computeOrdinal = getCommandQueueOrdinal(device);
126+
createImmediateCommandList(device, context, computeOrdinal, syncMode, cmdList);
127+
128+
// Create two shared buffers
129+
constexpr size_t allocSize = 4096;
130+
ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
131+
deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
132+
deviceDesc.ordinal = 0;
133+
134+
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
135+
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
136+
137+
void *srcBuffer = nullptr;
138+
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
139+
140+
void *dstBuffer = nullptr;
141+
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
142+
143+
// Initialize memory
144+
constexpr uint8_t val = 55;
145+
memset(srcBuffer, val, allocSize);
146+
memset(dstBuffer, 0, allocSize);
147+
148+
ze_module_handle_t module = nullptr;
149+
ze_kernel_handle_t kernel = nullptr;
150+
151+
std::ifstream file("copy_buffer_to_buffer.spv", std::ios::binary);
152+
153+
ze_event_pool_handle_t eventPoolHost;
154+
uint32_t numEvents = 2;
155+
std::vector<ze_event_handle_t> hostEvents(numEvents);
156+
createEventPoolAndEvents(context, device, eventPoolHost,
157+
(ze_event_pool_flag_t)(ZE_EVENT_POOL_FLAG_HOST_VISIBLE),
158+
numEvents, hostEvents.data(),
159+
ZE_EVENT_SCOPE_FLAG_HOST,
160+
(ze_event_scope_flag_t)0);
161+
162+
if (file.is_open()) {
163+
file.seekg(0, file.end);
164+
auto length = file.tellg();
165+
file.seekg(0, file.beg);
166+
167+
std::unique_ptr<char[]> spirvInput(new char[length]);
168+
file.read(spirvInput.get(), length);
169+
170+
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
171+
ze_module_build_log_handle_t buildlog;
172+
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
173+
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(spirvInput.get());
174+
moduleDesc.inputSize = length;
175+
moduleDesc.pBuildFlags = "";
176+
177+
if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) {
178+
size_t szLog = 0;
179+
zeModuleBuildLogGetString(buildlog, &szLog, nullptr);
180+
181+
char *strLog = (char *)malloc(szLog);
182+
zeModuleBuildLogGetString(buildlog, &szLog, strLog);
183+
std::cout << "Build log:" << strLog << std::endl;
184+
185+
free(strLog);
186+
}
187+
SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog));
188+
189+
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
190+
kernelDesc.pKernelName = "CopyBufferToBufferBytes";
191+
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
192+
193+
uint32_t groupSizeX = 32u;
194+
uint32_t groupSizeY = 1u;
195+
uint32_t groupSizeZ = 1u;
196+
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
197+
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
198+
199+
uint32_t offset = 0;
200+
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
201+
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
202+
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
203+
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
204+
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
205+
206+
ze_group_count_t dispatchTraits;
207+
dispatchTraits.groupCountX = allocSize / groupSizeX;
208+
dispatchTraits.groupCountY = 1u;
209+
dispatchTraits.groupCountZ = 1u;
210+
211+
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
212+
syncMode ? nullptr : hostEvents[0], 0, nullptr));
213+
file.close();
214+
} else {
215+
// Perform a GPU copy
216+
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize,
217+
syncMode ? nullptr : hostEvents[0], 0, nullptr));
218+
}
219+
220+
if (!syncMode) {
221+
// If Async mode, use event for sync
222+
SUCCESS_OR_TERMINATE(zeEventHostSynchronize(hostEvents[0], std::numeric_limits<uint64_t>::max() - 1));
223+
}
224+
225+
// Validate
226+
outputValidationSuccessful = true;
227+
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
228+
outputValidationSuccessful = false;
229+
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
230+
uint8_t *dstCharBuffer = static_cast<uint8_t *>(dstBuffer);
231+
for (size_t i = 0; i < allocSize; i++) {
232+
if (srcCharBuffer[i] != dstCharBuffer[i]) {
233+
std::cout << "srcBuffer[" << i << "] = " << static_cast<unsigned int>(srcCharBuffer[i]) << " not equal to "
234+
<< "dstBuffer[" << i << "] = " << static_cast<unsigned int>(dstCharBuffer[i]) << "\n";
235+
break;
236+
}
237+
}
238+
}
239+
240+
// Cleanup
241+
for (auto event : hostEvents) {
242+
SUCCESS_OR_TERMINATE(zeEventDestroy(event));
243+
}
244+
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
245+
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
246+
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
247+
}
248+
249+
int main(int argc, char *argv[]) {
250+
verbose = isVerbose(argc, argv);
251+
ze_context_handle_t context = nullptr;
252+
ze_driver_handle_t driverHandle = nullptr;
253+
auto devices = zelloInitContextAndGetDevices(context, driverHandle);
254+
auto device = devices[0];
255+
256+
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
257+
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
258+
std::cout << "Device : \n"
259+
<< " * name : " << deviceProperties.name << "\n"
260+
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
261+
262+
bool outputValidationSuccessful = true;
263+
if (outputValidationSuccessful) {
264+
//Sync mode with Compute queue
265+
std::cout << "Test case: Sync mode compute queue with Kernel launch \n";
266+
executeGpuKernelAndValidate(context, device, true, outputValidationSuccessful);
267+
}
268+
if (outputValidationSuccessful) {
269+
//Async mode with Compute queue
270+
std::cout << "\nTest case: Async mode compute queue with Kernel launch \n";
271+
executeGpuKernelAndValidate(context, device, false, outputValidationSuccessful);
272+
}
273+
if (outputValidationSuccessful) {
274+
//Sync mode with Copy queue
275+
std::cout << "\nTest case: Sync mode copy queue for memory copy\n";
276+
testCopyBetweenHostMemAndDeviceMem(context, device, true, outputValidationSuccessful);
277+
}
278+
279+
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
280+
std::cout << "\nZello Immediate Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
281+
return (outputValidationSuccessful ? 0 : 1);
282+
}

0 commit comments

Comments
 (0)