@@ -35,20 +35,26 @@ void executeGpuKernelAndValidate(ze_context_handle_t &context,
3535 ze_module_handle_t &module ,
3636 ze_kernel_handle_t &kernel,
3737 bool &outputValidationSuccessful,
38- bool useImmediateCommandList) {
39- ze_command_queue_handle_t cmdQueue;
40- ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
41- ze_command_list_handle_t cmdList;
42- ze_event_pool_handle_t eventPool;
38+ bool useImmediateCommandList,
39+ bool useAsync,
40+ int allocFlagValue) {
41+ ze_command_queue_handle_t cmdQueue = nullptr ;
42+ ze_command_list_handle_t cmdList = nullptr ;
43+ ze_event_pool_handle_t eventPool = nullptr ;
4344 ze_event_handle_t event = nullptr ;
4445
46+ ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
4547 cmdQueueDesc.ordinal = getCommandQueueOrdinal (device);
4648 cmdQueueDesc.index = 0 ;
47- cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
49+ if (useAsync) {
50+ cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
51+ createEventPoolAndEvents (context, device, eventPool, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1 , &event, ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST);
52+ } else {
53+ cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
54+ }
4855
4956 if (useImmediateCommandList) {
5057 SUCCESS_OR_TERMINATE (zeCommandListCreateImmediate (context, device, &cmdQueueDesc, &cmdList));
51- createEventPoolAndEvents (context, device, eventPool, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1 , &event, ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST);
5258 } else {
5359 SUCCESS_OR_TERMINATE (zeCommandQueueCreate (context, device, &cmdQueueDesc, &cmdQueue));
5460 SUCCESS_OR_TERMINATE (createCommandList (context, device, cmdList));
@@ -64,12 +70,10 @@ void executeGpuKernelAndValidate(ze_context_handle_t &context,
6470 uint32_t srcMemorySize = expectedMemorySize * srcAdditionalMul;
6571 uint32_t idxMemorySize = arraySize * sizeof (uint32_t );
6672
67- ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
68- deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
69- deviceDesc.ordinal = 0 ;
70-
7173 ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
72- hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
74+ if (allocFlagValue != 0 ) {
75+ hostDesc.flags = static_cast <ze_host_mem_alloc_flag_t >(allocFlagValue);
76+ }
7377
7478 void *srcBuffer = nullptr ;
7579 SUCCESS_OR_TERMINATE (zeMemAllocHost (context, &hostDesc, srcMemorySize, 1 , &srcBuffer));
@@ -122,13 +126,14 @@ void executeGpuKernelAndValidate(ze_context_handle_t &context,
122126 event,
123127 0 , nullptr ));
124128
125- if (useImmediateCommandList) {
126- SUCCESS_OR_TERMINATE (zeEventHostSynchronize (event, std::numeric_limits<uint64_t >::max ()));
127- } else {
129+ if (!useImmediateCommandList) {
128130 // Close list and submit for execution
129131 SUCCESS_OR_TERMINATE (zeCommandListClose (cmdList));
130132 SUCCESS_OR_TERMINATE (zeCommandQueueExecuteCommandLists (cmdQueue, 1 , &cmdList, nullptr ));
131- SUCCESS_OR_TERMINATE (zeCommandQueueSynchronize (cmdQueue, std::numeric_limits<uint64_t >::max ()));
133+ }
134+
135+ if (useAsync) {
136+ SUCCESS_OR_TERMINATE (zeEventHostSynchronize (event, std::numeric_limits<uint64_t >::max ()));
132137 }
133138
134139 // Validate
@@ -141,7 +146,9 @@ void executeGpuKernelAndValidate(ze_context_handle_t &context,
141146 if (srcCharBuffer[i] != dstCharBuffer[i]) {
142147 std::cout << " srcBuffer[" << i << " ] = " << static_cast <unsigned int >(srcCharBuffer[i]) << " not equal to "
143148 << " dstBuffer[" << i << " ] = " << static_cast <unsigned int >(dstCharBuffer[i]) << " \n " ;
144- break ;
149+ if (!verbose) {
150+ break ;
151+ }
145152 }
146153 }
147154 }
@@ -152,10 +159,11 @@ void executeGpuKernelAndValidate(ze_context_handle_t &context,
152159 SUCCESS_OR_TERMINATE (zeMemFree (context, idxBuffer));
153160 SUCCESS_OR_TERMINATE (zeMemFree (context, expectedMemory));
154161 SUCCESS_OR_TERMINATE (zeCommandListDestroy (cmdList));
155- if (useImmediateCommandList ) {
162+ if (useAsync ) {
156163 SUCCESS_OR_TERMINATE (zeEventDestroy (event));
157164 SUCCESS_OR_TERMINATE (zeEventPoolDestroy (eventPool));
158- } else {
165+ }
166+ if (!useImmediateCommandList) {
159167 SUCCESS_OR_TERMINATE (zeCommandQueueDestroy (cmdQueue));
160168 }
161169}
@@ -218,6 +226,8 @@ int main(int argc, char *argv[]) {
218226 verbose = isVerbose (argc, argv);
219227 bool aubMode = isAubMode (argc, argv);
220228 bool immediateFirst = isImmediateFirst (argc, argv);
229+ bool useAsync = isAsyncQueueEnabled (argc, argv);
230+ int allocFlagValue = getAllocationFlag (argc, argv, 0 );
221231
222232 ze_context_handle_t context = nullptr ;
223233 auto devices = zelloInitContextAndGetDevices (context);
@@ -246,13 +256,13 @@ int main(int argc, char *argv[]) {
246256 }
247257 };
248258
249- executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst);
259+ executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst, useAsync, allocFlagValue );
250260 caseName = selectCaseName (immediateFirst);
251261 printResult (aubMode, outputValidationSuccessful, blackBoxName, caseName);
252262
253263 if (outputValidationSuccessful || aubMode) {
254264 immediateFirst = !immediateFirst;
255- executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst);
265+ executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst, useAsync, allocFlagValue );
256266 caseName = selectCaseName (immediateFirst);
257267 printResult (aubMode, outputValidationSuccessful, blackBoxName, caseName);
258268 }
0 commit comments