Skip to content

Commit 3933753

Browse files
[EXP][CMDBUF] L0 Immediate command-list support
Adds support for L0 immediate command-list. The command-list containing the graph operations is still batch cmd-list but graphs can now be submitted even though immediate queue is requested by users. Prefix and Suffix additional command list types follow the Queue type.
1 parent 67e4d1b commit 3933753

File tree

2 files changed

+35
-57
lines changed

2 files changed

+35
-57
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 31 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
9393
const ur_exp_command_buffer_desc_t *Desc)
9494
: Context(Context), Device(Device), ZeCommandList(CommandList),
9595
ZeCommandListDesc(ZeDesc), QueueProperties(), SyncPoints(),
96-
NextSyncPoint(0), CommandListMap() {
96+
NextSyncPoint(0), ZeFencesList() {
9797
(void)Desc;
9898
urContextRetain(Context);
9999
urDeviceRetain(Device);
@@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
132132
}
133133

134134
// Release Fences allocated to command_buffer
135-
for (auto it = CommandListMap.begin(); it != CommandListMap.end(); ++it) {
136-
if (it->second.ZeFence != nullptr) {
137-
ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
138-
}
135+
for (auto &ZeFence : ZeFencesList) {
136+
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
139137
}
140138
}
141139

@@ -418,7 +416,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
418416
ZE2UR_CALL(
419417
zeCommandListAppendBarrier,
420418
(ZeCommandList, nullptr, 1, &RetCommandBuffer->WaitEvent->ZeEvent));
421-
422419
return UR_RESULT_SUCCESS;
423420
}
424421

@@ -687,12 +684,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
687684
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
688685
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
689686
ur_event_handle_t *Event) {
690-
// There are issues with immediate command lists so return an error if the
691-
// queue is in that mode.
692-
if (Queue->UsingImmCmdLists) {
693-
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
694-
}
695-
696687
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
697688
// Use compute engine rather than copy engine
698689
const auto UseCopyEngine = false;
@@ -702,78 +693,68 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
702693

703694
ze_fence_handle_t ZeFence;
704695
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
705-
ur_command_list_ptr_t CommandListPtr;
706696

707697
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
708-
// TODO: Refactor so requiring a map iterator is not required here, currently
709-
// required for executeCommandList though.
710-
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
711-
ZeQueueDesc.ordinal = QueueGroupOrdinal;
712-
CommandListPtr = CommandBuffer->CommandListMap.insert(
713-
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
714-
CommandBuffer->ZeCommandList,
715-
{ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
716-
717-
// Previous execution will have closed the command list, we need to reopen
718-
// it otherwise calling `executeCommandList` will return early.
719-
CommandListPtr->second.IsClosed = false;
720-
CommandListPtr->second.ZeFenceInUse = true;
698+
CommandBuffer->ZeFencesList.push_back(ZeFence);
721699

722700
// Create command-list to execute before `CommandListPtr` and will signal
723701
// when `EventWaitList` dependencies are complete.
724-
ur_command_list_ptr_t WaitCommandList{};
702+
bool MustSignalWaitEvent = true;
725703
if (NumEventsInWaitList) {
726704
_ur_ze_event_list_t TmpWaitList;
727705
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
728706
NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine));
729707

730-
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
731-
false, false))
732-
733708
// Update the WaitList of the Wait Event
734709
// Events are appended to the WaitList if the WaitList is not empty
735710
if (CommandBuffer->WaitEvent->WaitList.isEmpty())
736711
CommandBuffer->WaitEvent->WaitList = TmpWaitList;
737712
else
738713
CommandBuffer->WaitEvent->WaitList.insert(TmpWaitList);
739714

740-
ZE2UR_CALL(zeCommandListAppendBarrier,
741-
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
742-
CommandBuffer->WaitEvent->WaitList.Length,
743-
CommandBuffer->WaitEvent->WaitList.ZeEventList));
744-
} else {
745-
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
746-
false, false));
715+
if (!CommandBuffer->WaitEvent->WaitList.isEmpty()) {
716+
ur_command_list_ptr_t WaitCommandList{};
717+
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
718+
false, false))
719+
720+
ZE2UR_CALL(zeCommandListAppendBarrier,
721+
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
722+
CommandBuffer->WaitEvent->WaitList.Length,
723+
CommandBuffer->WaitEvent->WaitList.ZeEventList));
724+
Queue->executeCommandList(WaitCommandList, false, false);
725+
MustSignalWaitEvent = false;
726+
}
727+
}
747728

748-
ZE2UR_CALL(zeCommandListAppendSignalEvent,
749-
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
729+
if (MustSignalWaitEvent) {
730+
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
750731
}
751732

733+
// Submit main command-list. This command-list is of a batch command-list
734+
// type, regardless of the UR Queue type. We therefore need to submit the list
735+
// directly using the Level-Zero API to avoid type mismatches if using UR
736+
// functions.
737+
ZE2UR_CALL(zeCommandQueueExecuteCommandLists,
738+
(ZeCommandQueue, 1, &CommandBuffer->ZeCommandList, ZeFence));
739+
752740
// Execution event for this enqueue of the UR command-buffer
753741
ur_event_handle_t RetEvent{};
754-
// Create a command-list to signal RetEvent on completion
755-
ur_command_list_ptr_t SignalCommandList{};
756742
if (Event) {
743+
// Create a command-list to signal RetEvent on completion
744+
ur_command_list_ptr_t SignalCommandList{};
757745
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
758746
false, false));
759747

760748
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
761749
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
762-
SignalCommandList, false));
750+
SignalCommandList, false, true));
763751

764752
ZE2UR_CALL(zeCommandListAppendBarrier,
765753
(SignalCommandList->first, RetEvent->ZeEvent, 1,
766754
&(CommandBuffer->SignalEvent->ZeEvent)));
755+
Queue->executeCommandList(SignalCommandList, false, false);
767756
}
768757

769-
// Execution our command-lists asynchronously
770-
// TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
771-
// passing all three command-lists, rather than individual calls which
772-
// introduces latency.
773-
UR_CALL(Queue->executeCommandList(WaitCommandList, false, false));
774-
UR_CALL(Queue->executeCommandList(CommandListPtr, false, false));
775-
UR_CALL(Queue->executeCommandList(SignalCommandList, false, false));
776-
777758
if (Event) {
778759
*Event = RetEvent;
779760
}

source/adapters/level_zero/command_buffer.hpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
4646
ze_command_list_handle_t ZeCommandList;
4747
// Level Zero command list descriptor
4848
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
49+
// List of Level Zero fences created when submitting a graph.
50+
// This list is needed to release all fences retained by the
51+
// command_buffer.
52+
std::vector<ze_fence_handle_t> ZeFencesList;
4953
// Queue properties from command-buffer descriptor
5054
// TODO: Do we need these?
5155
ur_queue_properties_t QueueProperties;
@@ -55,13 +59,6 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
5559
// Next sync_point value (may need to consider ways to reuse values if 32-bits
5660
// is not enough)
5761
ur_exp_command_buffer_sync_point_t NextSyncPoint;
58-
// Command list map so we can use queue::executeCommandList.
59-
// Command list map is also used to release all the Fences retained by the
60-
// command_buffer std::unordered_multimap<ze_command_list_handle_t,
61-
// ur_command_list_info_t> CommandListMap; CommandListMap is redefined as a
62-
// multimap to enable mutiple commands enqueing into the same command_buffer
63-
std::unordered_multimap<ze_command_list_handle_t, ur_command_list_info_t>
64-
CommandListMap;
6562
// Event which will signals the most recent execution of the command-buffer
6663
// has finished
6764
ur_event_handle_t SignalEvent = nullptr;

0 commit comments

Comments
 (0)