@@ -92,8 +92,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
9292 ZeStruct<ze_command_list_desc_t > ZeDesc,
9393 const ur_exp_command_buffer_desc_t *Desc)
9494 : Context(Context), Device(Device), ZeCommandList(CommandList),
95- ZeCommandListDesc(ZeDesc), QueueProperties (), SyncPoints (),
96- NextSyncPoint( 0 ), CommandListMap( ) {
95+ ZeCommandListDesc(ZeDesc), ZeFencesList (), QueueProperties (),
96+ SyncPoints( ), NextSyncPoint( 0 ) {
9797 (void )Desc;
9898 urContextRetain (Context);
9999 urDeviceRetain (Device);
@@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
132132 }
133133
134134 // Release Fences allocated to command_buffer
135- for (auto it = CommandListMap.begin (); it != CommandListMap.end (); ++it) {
136- if (it->second .ZeFence != nullptr ) {
137- ZE_CALL_NOCHECK (zeFenceDestroy, (it->second .ZeFence ));
138- }
135+ for (auto &ZeFence : ZeFencesList) {
136+ ZE_CALL_NOCHECK (zeFenceDestroy, (ZeFence));
139137 }
140138}
141139
@@ -464,7 +462,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
464462 ZE2UR_CALL (
465463 zeCommandListAppendBarrier,
466464 (ZeCommandList, nullptr , 1 , &RetCommandBuffer->WaitEvent ->ZeEvent ));
467-
468465 return UR_RESULT_SUCCESS;
469466}
470467
@@ -856,12 +853,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
856853 ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
857854 uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
858855 ur_event_handle_t *Event) {
859- // There are issues with immediate command lists so return an error if the
860- // queue is in that mode.
861- if (Queue->UsingImmCmdLists ) {
862- return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
863- }
864-
865856 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
866857 // Use compute engine rather than copy engine
867858 const auto UseCopyEngine = false ;
@@ -871,22 +862,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
871862
872863 ze_fence_handle_t ZeFence;
873864 ZeStruct<ze_fence_desc_t > ZeFenceDesc;
874- ur_command_list_ptr_t CommandListPtr;
875865
876866 ZE2UR_CALL (zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
877- // TODO: Refactor so requiring a map iterator is not required here, currently
878- // required for executeCommandList though.
879- ZeStruct<ze_command_queue_desc_t > ZeQueueDesc;
880- ZeQueueDesc.ordinal = QueueGroupOrdinal;
881- CommandListPtr = CommandBuffer->CommandListMap .insert (
882- std::pair<ze_command_list_handle_t , ur_command_list_info_t >(
883- CommandBuffer->ZeCommandList ,
884- {ZeFence, false , false , ZeCommandQueue, ZeQueueDesc}));
885-
886- // Previous execution will have closed the command list, we need to reopen
887- // it otherwise calling `executeCommandList` will return early.
888- CommandListPtr->second .IsClosed = false ;
889- CommandListPtr->second .ZeFenceInUse = true ;
867+ CommandBuffer->ZeFencesList .push_back (ZeFence);
890868
891869 // Create command-list to execute before `CommandListPtr` and will signal
892870 // when `EventWaitList` dependencies are complete.
@@ -908,6 +886,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
908886 (WaitCommandList->first , ZeEvent));
909887 }
910888
889+ bool MustSignalWaitEvent = true ;
911890 if (NumEventsInWaitList) {
912891 _ur_ze_event_list_t TmpWaitList;
913892 UR_CALL (TmpWaitList.createAndRetainUrZeEventList (
@@ -920,17 +899,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
920899 else
921900 CommandBuffer->WaitEvent ->WaitList .insert (TmpWaitList);
922901
923- ZE2UR_CALL (zeCommandListAppendBarrier,
924- (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
925- CommandBuffer->WaitEvent ->WaitList .Length ,
926- CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
927- } else {
928- ZE2UR_CALL (zeCommandListAppendSignalEvent,
929- (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ));
902+ if (!CommandBuffer->WaitEvent ->WaitList .isEmpty ()) {
903+ ZE2UR_CALL (zeCommandListAppendBarrier,
904+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
905+ CommandBuffer->WaitEvent ->WaitList .Length ,
906+ CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
907+ Queue->executeCommandList (WaitCommandList, false , false );
908+ MustSignalWaitEvent = false ;
909+ }
910+ }
911+
912+ if (MustSignalWaitEvent) {
913+ ZE2UR_CALL (zeEventHostSignal, (CommandBuffer->WaitEvent ->ZeEvent ));
930914 }
931915
916+ // Submit main command-list. This command-list is of a batch command-list
917+ // type, regardless of the UR Queue type. We therefore need to submit the list
918+ // directly using the Level-Zero API to avoid type mismatches if using UR
919+ // functions.
920+ ZE2UR_CALL (zeCommandQueueExecuteCommandLists,
921+ (ZeCommandQueue, 1 , &CommandBuffer->ZeCommandList , ZeFence));
922+
932923 // Execution event for this enqueue of the UR command-buffer
933924 ur_event_handle_t RetEvent{};
925+
934926 // Create a command-list to signal RetEvent on completion
935927 ur_command_list_ptr_t SignalCommandList{};
936928 UR_CALL (Queue->Context ->getAvailableCommandList (Queue, SignalCommandList,
@@ -943,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
943935 if (Event) {
944936 UR_CALL (createEventAndAssociateQueue (Queue, &RetEvent,
945937 UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
946- SignalCommandList, false ));
938+ SignalCommandList, false , true ));
947939
948940 if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
949941 // Multiple submissions of a command buffer implies that we need to save
@@ -972,13 +964,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
972964 }
973965 }
974966
975- // Execution our command-lists asynchronously
976- // TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
977- // passing all three command-lists, rather than individual calls which
978- // introduces latency.
979- UR_CALL (Queue->executeCommandList (WaitCommandList, false , false ));
980- UR_CALL (Queue->executeCommandList (CommandListPtr, false , false ));
981- UR_CALL (Queue->executeCommandList (SignalCommandList, false , false ));
967+ Queue->executeCommandList (SignalCommandList, false , false );
982968
983969 if (Event) {
984970 *Event = RetEvent;
0 commit comments