@@ -93,7 +93,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
9393 const ur_exp_command_buffer_desc_t *Desc)
9494 : Context(Context), Device(Device), ZeCommandList(CommandList),
9595 ZeCommandListDesc(ZeDesc), QueueProperties(), SyncPoints(),
96- NextSyncPoint(0 ), CommandListMap () {
96+ NextSyncPoint(0 ), ZeFencesList () {
9797 (void )Desc;
9898 urContextRetain (Context);
9999 urDeviceRetain (Device);
@@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
132132 }
133133
134134 // Release Fences allocated to command_buffer
135- for (auto it = CommandListMap.begin (); it != CommandListMap.end (); ++it) {
136- if (it->second .ZeFence != nullptr ) {
137- ZE_CALL_NOCHECK (zeFenceDestroy, (it->second .ZeFence ));
138- }
135+ for (auto &ZeFence : ZeFencesList) {
136+ ZE_CALL_NOCHECK (zeFenceDestroy, (ZeFence));
139137 }
140138}
141139
@@ -418,7 +416,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
418416 ZE2UR_CALL (
419417 zeCommandListAppendBarrier,
420418 (ZeCommandList, nullptr , 1 , &RetCommandBuffer->WaitEvent ->ZeEvent ));
421-
422419 return UR_RESULT_SUCCESS;
423420}
424421
@@ -687,12 +684,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
687684 ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
688685 uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
689686 ur_event_handle_t *Event) {
690- // There are issues with immediate command lists so return an error if the
691- // queue is in that mode.
692- if (Queue->UsingImmCmdLists ) {
693- return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
694- }
695-
696687 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
697688 // Use compute engine rather than copy engine
698689 const auto UseCopyEngine = false ;
@@ -702,78 +693,68 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
702693
703694 ze_fence_handle_t ZeFence;
704695 ZeStruct<ze_fence_desc_t > ZeFenceDesc;
705- ur_command_list_ptr_t CommandListPtr;
706696
707697 ZE2UR_CALL (zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
708- // TODO: Refactor so requiring a map iterator is not required here, currently
709- // required for executeCommandList though.
710- ZeStruct<ze_command_queue_desc_t > ZeQueueDesc;
711- ZeQueueDesc.ordinal = QueueGroupOrdinal;
712- CommandListPtr = CommandBuffer->CommandListMap .insert (
713- std::pair<ze_command_list_handle_t , ur_command_list_info_t >(
714- CommandBuffer->ZeCommandList ,
715- {ZeFence, false , false , ZeCommandQueue, ZeQueueDesc}));
716-
717- // Previous execution will have closed the command list, we need to reopen
718- // it otherwise calling `executeCommandList` will return early.
719- CommandListPtr->second .IsClosed = false ;
720- CommandListPtr->second .ZeFenceInUse = true ;
698+ CommandBuffer->ZeFencesList .push_back (ZeFence);
721699
722700 // Create command-list to execute before `CommandListPtr` and will signal
723701 // when `EventWaitList` dependencies are complete.
724- ur_command_list_ptr_t WaitCommandList{} ;
702+ bool MustSignalWaitEvent = true ;
725703 if (NumEventsInWaitList) {
726704 _ur_ze_event_list_t TmpWaitList;
727705 UR_CALL (TmpWaitList.createAndRetainUrZeEventList (
728706 NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
729707
730- UR_CALL (Queue->Context ->getAvailableCommandList (Queue, WaitCommandList,
731- false , false ))
732-
733708 // Update the WaitList of the Wait Event
734709 // Events are appended to the WaitList if the WaitList is not empty
735710 if (CommandBuffer->WaitEvent ->WaitList .isEmpty ())
736711 CommandBuffer->WaitEvent ->WaitList = TmpWaitList;
737712 else
738713 CommandBuffer->WaitEvent ->WaitList .insert (TmpWaitList);
739714
740- ZE2UR_CALL (zeCommandListAppendBarrier,
741- (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
742- CommandBuffer->WaitEvent ->WaitList .Length ,
743- CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
744- } else {
745- UR_CALL (Queue->Context ->getAvailableCommandList (Queue, WaitCommandList,
746- false , false ));
715+ if (!CommandBuffer->WaitEvent ->WaitList .isEmpty ()) {
716+ ur_command_list_ptr_t WaitCommandList{};
717+ UR_CALL (Queue->Context ->getAvailableCommandList (Queue, WaitCommandList,
718+ false , false ))
719+
720+ ZE2UR_CALL (zeCommandListAppendBarrier,
721+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
722+ CommandBuffer->WaitEvent ->WaitList .Length ,
723+ CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
724+ Queue->executeCommandList (WaitCommandList, false , false );
725+ MustSignalWaitEvent = false ;
726+ }
727+ }
747728
748- ZE2UR_CALL (zeCommandListAppendSignalEvent,
749- (WaitCommandList-> first , CommandBuffer->WaitEvent ->ZeEvent ));
729+ if (MustSignalWaitEvent) {
730+ ZE2UR_CALL (zeEventHostSignal, ( CommandBuffer->WaitEvent ->ZeEvent ));
750731 }
751732
733+ // Submit main command-list. This command-list is of a batch command-list
734+ // type, regardless of the UR Queue type. We therefore need to submit the list
735+ // directly using the Level-Zero API to avoid type mismatches if using UR
736+ // functions.
737+ ZE2UR_CALL (zeCommandQueueExecuteCommandLists,
738+ (ZeCommandQueue, 1 , &CommandBuffer->ZeCommandList , ZeFence));
739+
752740 // Execution event for this enqueue of the UR command-buffer
753741 ur_event_handle_t RetEvent{};
754- // Create a command-list to signal RetEvent on completion
755- ur_command_list_ptr_t SignalCommandList{};
756742 if (Event) {
743+ // Create a command-list to signal RetEvent on completion
744+ ur_command_list_ptr_t SignalCommandList{};
757745 UR_CALL (Queue->Context ->getAvailableCommandList (Queue, SignalCommandList,
758746 false , false ));
759747
760748 UR_CALL (createEventAndAssociateQueue (Queue, &RetEvent,
761749 UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
762- SignalCommandList, false ));
750+ SignalCommandList, false , true ));
763751
764752 ZE2UR_CALL (zeCommandListAppendBarrier,
765753 (SignalCommandList->first , RetEvent->ZeEvent , 1 ,
766754 &(CommandBuffer->SignalEvent ->ZeEvent )));
755+ Queue->executeCommandList (SignalCommandList, false , false );
767756 }
768757
769- // Execution our command-lists asynchronously
770- // TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
771- // passing all three command-lists, rather than individual calls which
772- // introduces latency.
773- UR_CALL (Queue->executeCommandList (WaitCommandList, false , false ));
774- UR_CALL (Queue->executeCommandList (CommandListPtr, false , false ));
775- UR_CALL (Queue->executeCommandList (SignalCommandList, false , false ));
776-
777758 if (Event) {
778759 *Event = RetEvent;
779760 }
0 commit comments