@@ -84,8 +84,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
8484
8585 // Get a new command list to be used on this call
8686 ur_command_list_ptr_t CommandList{};
87- UR_CALL (Queue->Context ->getAvailableCommandList (Queue, CommandList,
88- UseCopyEngine ));
87+ UR_CALL (Queue->Context ->getAvailableCommandList (
88+ Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList ));
8989
9090 ze_event_handle_t ZeEvent = nullptr ;
9191 ur_event_handle_t InternalEvent;
@@ -256,7 +256,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
256256 // Get an arbitrary command-list in the queue.
257257 ur_command_list_ptr_t CmdList;
258258 UR_CALL (Queue->Context ->getAvailableCommandList (
259- Queue, CmdList, false /* UseCopyEngine=*/ , OkToBatch));
259+ Queue, CmdList, false /* UseCopyEngine=*/ , NumEventsInWaitList,
260+ EventWaitList, OkToBatch));
260261
261262 // Insert the barrier into the command-list and execute.
262263 UR_CALL (insertBarrierIntoCmdList (CmdList, TmpWaitList, *Event, IsInternal));
@@ -311,7 +312,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
311312 if (ZeQueue) {
312313 ur_command_list_ptr_t CmdList;
313314 UR_CALL (Queue->Context ->getAvailableCommandList (
314- Queue, CmdList, UseCopyEngine, OkToBatch, &ZeQueue));
315+ Queue, CmdList, UseCopyEngine, NumEventsInWaitList,
316+ EventWaitList, OkToBatch, &ZeQueue));
315317 CmdLists.push_back (CmdList);
316318 }
317319 }
@@ -324,7 +326,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
324326 // Get any available command list.
325327 ur_command_list_ptr_t CmdList;
326328 UR_CALL (Queue->Context ->getAvailableCommandList (
327- Queue, CmdList, false /* UseCopyEngine=*/ , OkToBatch));
329+ Queue, CmdList, false /* UseCopyEngine=*/ , NumEventsInWaitList,
330+ EventWaitList, OkToBatch));
328331 CmdLists.push_back (CmdList);
329332 }
330333
@@ -598,6 +601,7 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
598601 this ->Mutex );
599602
600603 if (!HostVisibleEvent) {
604+ this ->IsCreatingHostProxyEvent = true ;
601605 if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy)
602606 die (" getOrCreateHostVisibleEvent: missing host-visible event" );
603607
@@ -612,20 +616,26 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
612616
613617 ur_command_list_ptr_t CommandList{};
614618 UR_CALL (UrQueue->Context ->getAvailableCommandList (
615- UrQueue, CommandList, false /* UseCopyEngine */ , OkToBatch))
619+ UrQueue, CommandList, false /* UseCopyEngine */ , 0 , nullptr , OkToBatch))
616620
617621 // Create a "proxy" host-visible event.
618622 UR_CALL (createEventAndAssociateQueue (
619623 UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList,
620624 /* IsInternal */ false , /* IsMultiDevice */ false ,
621625 /* HostVisible */ true ));
622626
623- ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
624- (CommandList->first , 1 , &ZeEvent));
627+ if (this ->IsInnerBatchedEvent ) {
628+ ZE2UR_CALL (zeCommandListAppendBarrier,
629+ (CommandList->first , ZeEvent, 0 , nullptr ));
630+ } else {
631+ ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
632+ (CommandList->first , 1 , &ZeEvent));
633+ }
625634 ZE2UR_CALL (zeCommandListAppendSignalEvent,
626635 (CommandList->first , HostVisibleEvent->ZeEvent ));
627636
628637 UR_CALL (UrQueue->executeCommandList (CommandList, false , OkToBatch))
638+ this ->IsCreatingHostProxyEvent = false ;
629639 }
630640
631641 ZeHostVisibleEvent = HostVisibleEvent->ZeEvent ;
@@ -682,7 +692,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(
682692
683693 ze_event_handle_t ZeEvent = HostVisibleEvent->ZeEvent ;
684694 logger::debug (" ZeEvent = {}" , ur_cast<std::uintptr_t >(ZeEvent));
685- ZE2UR_CALL (zeHostSynchronize, (ZeEvent));
695+ // If this event was an inner batched event, then sync with
696+ // the Queue instead of waiting on the event.
697+ if (HostVisibleEvent->IsInnerBatchedEvent && Event->ZeBatchedQueue ) {
698+ ZE2UR_CALL (zeHostSynchronize, (Event->ZeBatchedQueue ));
699+ } else {
700+ ZE2UR_CALL (zeHostSynchronize, (ZeEvent));
701+ }
686702 Event->Completed = true ;
687703 }
688704 }
@@ -938,7 +954,12 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
938954 std::list<ur_event_handle_t > EventsToBeReleased;
939955 ur_queue_handle_t AssociatedQueue = nullptr ;
940956 {
941- std::scoped_lock<ur_shared_mutex> EventLock (Event->Mutex );
957+ // If the Event is already locked, then continue with the cleanup, otherwise
958+ // block on locking the event.
959+ std::unique_lock<ur_shared_mutex> EventLock (Event->Mutex , std::try_to_lock);
960+ if (!EventLock.owns_lock () && !Event->IsCreatingHostProxyEvent ) {
961+ EventLock.lock ();
962+ }
942963 if (SetEventCompleted)
943964 Event->Completed = true ;
944965 // Exit early of event was already cleanedup.
@@ -1324,8 +1345,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13241345 // Get a command list prior to acquiring an event lock.
13251346 // This prevents a potential deadlock with recursive
13261347 // event locks.
1327- UR_CALL (Queue->Context ->getAvailableCommandList (Queue, CommandList,
1328- false , true ));
1348+ UR_CALL (Queue->Context ->getAvailableCommandList (
1349+ Queue, CommandList, false , 0 , nullptr , true ));
13291350 }
13301351
13311352 std::shared_lock<ur_shared_mutex> Lock (EventList[I]->Mutex );
0 commit comments