@@ -1104,35 +1104,36 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11041104 uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
11051105 ur_event_handle_t phEvent) {
11061106
1107- // TODO: remove memory allocation
1107+ std::scoped_lock<ur_shared_mutex> Lock (hKernel-> Mutex );
11081108
11091109 // kernelMemObj contains kernel memory objects that
11101110 // UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ kernelArgs pointers point to
1111- std::vector<void *> kernelMemObj (numArgs, nullptr );
1112- std::vector<void *> kernelArgs (numArgs, nullptr );
1113- std::scoped_lock<ur_shared_mutex> Lock (hKernel->Mutex );
1111+ hKernel->kernelMemObj .reserve (numArgs);
1112+ hKernel->kernelMemObj .clear ();
1113+ hKernel->kernelArgs .reserve (numArgs);
1114+ hKernel->kernelArgs .clear ();
11141115
11151116 for (uint32_t argIndex = 0 ; argIndex < numArgs; argIndex++) {
11161117 switch (pArgs[argIndex].type ) {
11171118 case UR_EXP_KERNEL_ARG_TYPE_LOCAL:
1118- kernelArgs[argIndex] = (void *)&pArgs[argIndex].size ;
1119+ hKernel-> kernelArgs [argIndex] = (void *)&pArgs[argIndex].size ;
11191120 break ;
11201121 case UR_EXP_KERNEL_ARG_TYPE_VALUE:
1121- kernelArgs[argIndex] = (void *)pArgs[argIndex].value .value ;
1122+ hKernel-> kernelArgs [argIndex] = (void *)pArgs[argIndex].value .value ;
11221123 break ;
11231124 case UR_EXP_KERNEL_ARG_TYPE_POINTER:
1124- kernelArgs[argIndex] = (void *)&pArgs[argIndex].value .pointer ;
1125+ hKernel-> kernelArgs [argIndex] = (void *)&pArgs[argIndex].value .pointer ;
11251126 break ;
11261127 case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ:
11271128 // prepareForSubmission() will save zePtr in kernelMemObj[argIndex]
1128- kernelArgs[argIndex] = &kernelMemObj[argIndex];
1129+ hKernel-> kernelArgs [argIndex] = &hKernel-> kernelMemObj [argIndex];
11291130 UR_CALL (hKernel->addPendingMemoryAllocation (
11301131 {pArgs[argIndex].value .memObjTuple .hMem ,
11311132 ur_mem_buffer_t ::device_access_mode_t ::read_write,
11321133 pArgs[argIndex].index }));
11331134 break ;
11341135 case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: {
1135- kernelArgs[argIndex] = &pArgs[argIndex].value .sampler ->ZeSampler ;
1136+ hKernel-> kernelArgs [argIndex] = &pArgs[argIndex].value .sampler ->ZeSampler ;
11361137 break ;
11371138 }
11381139 default :
@@ -1167,7 +1168,7 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11671168
11681169 UR_CALL (hKernel->prepareForSubmission (
11691170 hContext.get (), hDevice.get (), pGlobalWorkOffset, workDim, WG[0 ], WG[1 ],
1170- WG[2 ], getZeCommandList (), waitListView, &kernelArgs));
1171+ WG[2 ], getZeCommandList (), waitListView, &hKernel-> kernelArgs ));
11711172
11721173 {
11731174 TRACK_SCOPE_LATENCY (" ur_command_list_manager::"
@@ -1176,7 +1177,7 @@ ur_result_t ur_command_list_manager::appendKernelLaunchWithArgsExpNew(
11761177 ->ZeCommandListAppendLaunchKernelWithArgumentsExt
11771178 .zeCommandListAppendLaunchKernelWithArguments ,
11781179 (getZeCommandList (), hZeKernel, zeThreadGroupDimensions,
1179- groupSize, kernelArgs.data (), nullptr , zeSignalEvent,
1180+ groupSize, hKernel-> kernelArgs .data (), nullptr , zeSignalEvent,
11801181 waitListView.num , waitListView.handles ));
11811182 }
11821183
0 commit comments