@@ -30,16 +30,29 @@ scratch_kernel(__global int *resIdx, global TYPE *src, global TYPE *dst) {
3030}
3131)===" ;
3232
33- void executeGpuKernelAndValidate (ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) {
33+ void executeGpuKernelAndValidate (ze_context_handle_t &context,
34+ ze_device_handle_t &device,
35+ ze_module_handle_t &module ,
36+ ze_kernel_handle_t &kernel,
37+ bool &outputValidationSuccessful,
38+ bool useImmediateCommandList) {
3439 ze_command_queue_handle_t cmdQueue;
3540 ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
3641 ze_command_list_handle_t cmdList;
42+ ze_event_pool_handle_t eventPool;
43+ ze_event_handle_t event = nullptr ;
3744
3845 cmdQueueDesc.ordinal = getCommandQueueOrdinal (device);
3946 cmdQueueDesc.index = 0 ;
4047 cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
41- SUCCESS_OR_TERMINATE (zeCommandQueueCreate (context, device, &cmdQueueDesc, &cmdQueue));
42- SUCCESS_OR_TERMINATE (createCommandList (context, device, cmdList));
48+
49+ if (useImmediateCommandList) {
50+ SUCCESS_OR_TERMINATE (zeCommandListCreateImmediate (context, device, &cmdQueueDesc, &cmdList));
51+ createEventPoolAndEvents (context, device, eventPool, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, 1 , &event, ZE_EVENT_SCOPE_FLAG_HOST, ZE_EVENT_SCOPE_FLAG_HOST);
52+ } else {
53+ SUCCESS_OR_TERMINATE (zeCommandQueueCreate (context, device, &cmdQueueDesc, &cmdQueue));
54+ SUCCESS_OR_TERMINATE (createCommandList (context, device, cmdList));
55+ }
4356
4457 // Create two shared buffers
4558 uint32_t arraySize = 32 ;
@@ -90,16 +103,74 @@ void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t
90103 }
91104 }
92105
106+ uint32_t groupSizeX = arraySize;
107+ uint32_t groupSizeY = 1u ;
108+ uint32_t groupSizeZ = 1u ;
109+ SUCCESS_OR_TERMINATE (zeKernelSuggestGroupSize (kernel, groupSizeX, 1U , 1U , &groupSizeX, &groupSizeY, &groupSizeZ));
110+ SUCCESS_OR_TERMINATE (zeKernelSetGroupSize (kernel, groupSizeX, groupSizeY, groupSizeZ));
111+
112+ SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 2 , sizeof (dstBuffer), &dstBuffer));
113+ SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 1 , sizeof (srcBuffer), &srcBuffer));
114+ SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 0 , sizeof (idxBuffer), &idxBuffer));
115+
116+ ze_group_count_t dispatchTraits;
117+ dispatchTraits.groupCountX = 1u ;
118+ dispatchTraits.groupCountY = 1u ;
119+ dispatchTraits.groupCountZ = 1u ;
120+
121+ SUCCESS_OR_TERMINATE (zeCommandListAppendLaunchKernel (cmdList, kernel, &dispatchTraits,
122+ event,
123+ 0 , nullptr ));
124+
125+ if (useImmediateCommandList) {
126+ SUCCESS_OR_TERMINATE (zeEventHostSynchronize (event, std::numeric_limits<uint64_t >::max ()));
127+ } else {
128+ // Close list and submit for execution
129+ SUCCESS_OR_TERMINATE (zeCommandListClose (cmdList));
130+ SUCCESS_OR_TERMINATE (zeCommandQueueExecuteCommandLists (cmdQueue, 1 , &cmdList, nullptr ));
131+ SUCCESS_OR_TERMINATE (zeCommandQueueSynchronize (cmdQueue, std::numeric_limits<uint64_t >::max ()));
132+ }
133+
134+ // Validate
135+ outputValidationSuccessful = true ;
136+ if (memcmp (dstBuffer, expectedMemory, expectedMemorySize)) {
137+ outputValidationSuccessful = false ;
138+ uint8_t *srcCharBuffer = static_cast <uint8_t *>(expectedMemory);
139+ uint8_t *dstCharBuffer = static_cast <uint8_t *>(dstBuffer);
140+ for (size_t i = 0 ; i < expectedMemorySize; i++) {
141+ if (srcCharBuffer[i] != dstCharBuffer[i]) {
142+ std::cout << " srcBuffer[" << i << " ] = " << static_cast <unsigned int >(srcCharBuffer[i]) << " not equal to "
143+ << " dstBuffer[" << i << " ] = " << static_cast <unsigned int >(dstCharBuffer[i]) << " \n " ;
144+ break ;
145+ }
146+ }
147+ }
148+
149+ // Cleanup
150+ SUCCESS_OR_TERMINATE (zeMemFree (context, dstBuffer));
151+ SUCCESS_OR_TERMINATE (zeMemFree (context, srcBuffer));
152+ SUCCESS_OR_TERMINATE (zeMemFree (context, idxBuffer));
153+ SUCCESS_OR_TERMINATE (zeMemFree (context, expectedMemory));
154+ SUCCESS_OR_TERMINATE (zeCommandListDestroy (cmdList));
155+ if (useImmediateCommandList) {
156+ SUCCESS_OR_TERMINATE (zeEventDestroy (event));
157+ SUCCESS_OR_TERMINATE (zeEventPoolDestroy (eventPool));
158+ } else {
159+ SUCCESS_OR_TERMINATE (zeCommandQueueDestroy (cmdQueue));
160+ }
161+ }
162+
163+ void createModuleKernel (ze_context_handle_t &context,
164+ ze_device_handle_t &device,
165+ ze_module_handle_t &module ,
166+ ze_kernel_handle_t &kernel) {
93167 std::string buildLog;
94168 auto spirV = compileToSpirV (moduleSrc, " " , buildLog);
95169 if (buildLog.size () > 0 ) {
96170 std::cout << " Build log " << buildLog;
97171 }
98172 SUCCESS_OR_TERMINATE ((0 == spirV.size ()));
99173
100- ze_module_handle_t module = nullptr ;
101- ze_kernel_handle_t kernel = nullptr ;
102-
103174 ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
104175 ze_module_build_log_handle_t buildlog;
105176 moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
@@ -130,61 +201,25 @@ void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t
130201 ze_kernel_properties_t kernelProperties{ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES};
131202 SUCCESS_OR_TERMINATE (zeKernelGetProperties (kernel, &kernelProperties));
132203 std::cout << " Scratch size = " << kernelProperties.spillMemSize << " \n " ;
204+ }
133205
134- uint32_t groupSizeX = arraySize;
135- uint32_t groupSizeY = 1u ;
136- uint32_t groupSizeZ = 1u ;
137- SUCCESS_OR_TERMINATE (zeKernelSuggestGroupSize (kernel, groupSizeX, 1U , 1U , &groupSizeX, &groupSizeY, &groupSizeZ));
138- SUCCESS_OR_TERMINATE (zeKernelSetGroupSize (kernel, groupSizeX, groupSizeY, groupSizeZ));
139-
140- SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 2 , sizeof (dstBuffer), &dstBuffer));
141- SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 1 , sizeof (srcBuffer), &srcBuffer));
142- SUCCESS_OR_TERMINATE (zeKernelSetArgumentValue (kernel, 0 , sizeof (idxBuffer), &idxBuffer));
143-
144- ze_group_count_t dispatchTraits;
145- dispatchTraits.groupCountX = 1u ;
146- dispatchTraits.groupCountY = 1u ;
147- dispatchTraits.groupCountZ = 1u ;
148-
149- SUCCESS_OR_TERMINATE (zeCommandListAppendLaunchKernel (cmdList, kernel, &dispatchTraits,
150- nullptr , 0 , nullptr ));
151-
152- // Close list and submit for execution
153- SUCCESS_OR_TERMINATE (zeCommandListClose (cmdList));
154- SUCCESS_OR_TERMINATE (zeCommandQueueExecuteCommandLists (cmdQueue, 1 , &cmdList, nullptr ));
155-
156- SUCCESS_OR_TERMINATE (zeCommandQueueSynchronize (cmdQueue, std::numeric_limits<uint64_t >::max ()));
206+ inline bool isImmediateFirst (int argc, char *argv[]) {
207+ bool enabled = isParamEnabled (argc, argv, " -i" , " --immediate" );
157208
158- // Validate
159- outputValidationSuccessful = true ;
160- if (memcmp (dstBuffer, expectedMemory, expectedMemorySize)) {
161- outputValidationSuccessful = false ;
162- uint8_t *srcCharBuffer = static_cast <uint8_t *>(expectedMemory);
163- uint8_t *dstCharBuffer = static_cast <uint8_t *>(dstBuffer);
164- for (size_t i = 0 ; i < expectedMemorySize; i++) {
165- if (srcCharBuffer[i] != dstCharBuffer[i]) {
166- std::cout << " srcBuffer[" << i << " ] = " << static_cast <unsigned int >(srcCharBuffer[i]) << " not equal to "
167- << " dstBuffer[" << i << " ] = " << static_cast <unsigned int >(dstCharBuffer[i]) << " \n " ;
168- break ;
169- }
170- }
209+ if (verbose && enabled) {
210+ std::cerr << " Immediate Command List executed first" << std::endl;
171211 }
172212
173- // Cleanup
174- SUCCESS_OR_TERMINATE (zeMemFree (context, dstBuffer));
175- SUCCESS_OR_TERMINATE (zeMemFree (context, srcBuffer));
176- SUCCESS_OR_TERMINATE (zeMemFree (context, idxBuffer));
177- SUCCESS_OR_TERMINATE (zeMemFree (context, expectedMemory));
178- SUCCESS_OR_TERMINATE (zeCommandListDestroy (cmdList));
179- SUCCESS_OR_TERMINATE (zeCommandQueueDestroy (cmdQueue));
213+ return enabled;
180214}
181215
182216int main (int argc, char *argv[]) {
183217 const std::string blackBoxName = " Zello Scratch" ;
184218 verbose = isVerbose (argc, argv);
185- ze_context_handle_t context = nullptr ;
186219 bool aubMode = isAubMode (argc, argv);
220+ bool immediateFirst = isImmediateFirst (argc, argv);
187221
222+ ze_context_handle_t context = nullptr ;
188223 auto devices = zelloInitContextAndGetDevices (context);
189224 auto device = devices[0 ];
190225 bool outputValidationSuccessful;
@@ -193,11 +228,39 @@ int main(int argc, char *argv[]) {
193228 SUCCESS_OR_TERMINATE (zeDeviceGetProperties (device, &deviceProperties));
194229 printDeviceProperties (deviceProperties);
195230
196- executeGpuKernelAndValidate (context, device, outputValidationSuccessful);
231+ ze_module_handle_t module = nullptr ;
232+ ze_kernel_handle_t kernel = nullptr ;
233+
234+ createModuleKernel (context, device, module , kernel);
235+
236+ const std::string regularCaseName = " Regular Command List" ;
237+ const std::string immediateCaseName = " Immediate Command List" ;
238+
239+ std::string caseName;
240+
241+ auto selectCaseName = [®ularCaseName, &immediateCaseName](bool immediate) {
242+ if (immediate) {
243+ return immediateCaseName;
244+ } else {
245+ return regularCaseName;
246+ }
247+ };
248+
249+ executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst);
250+ caseName = selectCaseName (immediateFirst);
251+ printResult (aubMode, outputValidationSuccessful, blackBoxName, caseName);
252+
253+ if (outputValidationSuccessful || aubMode) {
254+ immediateFirst = !immediateFirst;
255+ executeGpuKernelAndValidate (context, device, module , kernel, outputValidationSuccessful, immediateFirst);
256+ caseName = selectCaseName (immediateFirst);
257+ printResult (aubMode, outputValidationSuccessful, blackBoxName, caseName);
258+ }
197259
260+ SUCCESS_OR_TERMINATE (zeKernelDestroy (kernel));
261+ SUCCESS_OR_TERMINATE (zeModuleDestroy (module ));
198262 SUCCESS_OR_TERMINATE (zeContextDestroy (context));
199263
200- printResult (aubMode, outputValidationSuccessful, blackBoxName);
201264 outputValidationSuccessful = aubMode ? true : outputValidationSuccessful;
202265 return (outputValidationSuccessful ? 0 : 1 );
203266}
0 commit comments