@@ -939,51 +939,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
939939 // Create a command-list to signal RetEvent on completion
940940 ur_command_list_ptr_t SignalCommandList{};
941941 if (Event) {
942+ ur_event_handle_t SyncEvent = CommandBuffer->SignalEvent ;
942943 UR_CALL (Queue->Context ->getAvailableCommandList (Queue, SignalCommandList,
943944 false , false ));
944945
945946 UR_CALL (createEventAndAssociateQueue (Queue, &RetEvent,
946947 UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
947948 SignalCommandList, false ));
948949
949- ZE2UR_CALL (zeCommandListAppendBarrier,
950- (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
951- &(CommandBuffer->SignalEvent ->ZeEvent )));
952-
953950 if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
954951 // Multiple submissions of a command buffer implies that we need to save
955952 // the event timestamps before resubmiting the command buffer. We
956953 // therefore copy the these timestamps in a dedicated USM memory section
957954 // before completing the command buffer execution, and then attach this
958955 // memory to the event returned to users to allow to allow the profiling
959956 // engine to recover these timestamps.
960- ur_usm_desc_t USMDesc{};
961- ur_usm_device_desc_t UsmDeviceDesc{};
962- UsmDeviceDesc.stype = UR_STRUCTURE_TYPE_USM_DEVICE_DESC;
963- ur_usm_host_desc_t UsmHostDesc{};
964- UsmHostDesc.stype = UR_STRUCTURE_TYPE_USM_HOST_DESC;
965- UsmDeviceDesc.pNext = &UsmHostDesc;
966- USMDesc.pNext = &UsmDeviceDesc;
967- USMDesc.align = 4 ; // 4byte-aligned
968-
969- size_t Size = WaitEventList.size () * sizeof (ze_kernel_timestamp_result_t );
957+ UR_CALL (createEventAndAssociateQueue (
958+ Queue, &SyncEvent, UR_COMMAND_USM_MEMCPY, SignalCommandList, false ));
970959
971- struct command_buffer_profiling_t *Profiling =
972- new command_buffer_profiling_t ();
960+ command_buffer_profiling_t *Profiling = new command_buffer_profiling_t ();
973961
974962 Profiling->NumEvents = WaitEventList.size ();
975-
976- urUSMSharedAlloc (RetEvent->Context , CommandBuffer->Device , &USMDesc,
977- nullptr , Size, (void **)&Profiling->Timestamps );
963+ Profiling->Timestamps =
964+ new ze_kernel_timestamp_result_t [Profiling->NumEvents ];
978965
979966 ZE2UR_CALL (zeCommandListAppendQueryKernelTimestamps,
980967 (SignalCommandList->first , WaitEventList.size (),
981- WaitEventList.data (), Profiling->Timestamps , 0 ,
982- RetEvent ->ZeEvent , 1 ,
968+ WaitEventList.data (), ( void *) Profiling->Timestamps , 0 ,
969+ SyncEvent ->ZeEvent , 1 ,
983970 &(CommandBuffer->SignalEvent ->ZeEvent )));
984971
985972 RetEvent->CommandData = static_cast <void *>(Profiling);
986973 }
974+
975+ ZE2UR_CALL (zeCommandListAppendBarrier,
976+ (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
977+ &(SyncEvent->ZeEvent )));
987978 }
988979
989980 // Execution our command-lists asynchronously
0 commit comments