Skip to content

Commit 8e40a54

Browse files
committed
CreateCommandBuffer
1 parent c9ac1ba commit 8e40a54

File tree

2 files changed

+92
-106
lines changed

2 files changed

+92
-106
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 79 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,15 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
5858
ze_command_list_handle_t CommandList,
5959
ze_command_list_handle_t CommandListTranslated,
6060
ze_command_list_handle_t CommandListResetEvents,
61-
ze_command_list_handle_t CopyCommandList,
62-
ZeStruct<ze_command_list_desc_t> ZeDesc,
63-
ZeStruct<ze_command_list_desc_t> ZeCopyDesc,
61+
ze_command_list_handle_t CopyCommandList, ur_event_handle_t SignalEvent,
62+
ur_event_handle_t WaitEvent, ur_event_handle_t AllResetEvent,
6463
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList)
6564
: Context(Context), Device(Device), ZeComputeCommandList(CommandList),
6665
ZeComputeCommandListTranslated(CommandListTranslated),
6766
ZeCommandListResetEvents(CommandListResetEvents),
68-
ZeCommandListDesc(ZeDesc), ZeCopyCommandList(CopyCommandList),
69-
ZeCopyCommandListDesc(ZeCopyDesc), ZeFencesMap(), ZeActiveFence(nullptr),
70-
SyncPoints(), NextSyncPoint(0),
67+
ZeCopyCommandList(CopyCommandList), SignalEvent(SignalEvent),
68+
WaitEvent(WaitEvent), AllResetEvent(AllResetEvent), ZeFencesMap(),
69+
ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0),
7170
IsUpdatable(Desc ? Desc->isUpdatable : false),
7271
IsProfilingEnabled(Desc ? Desc->enableProfiling : false),
7372
IsInOrderCmdList(IsInOrderCmdList) {
@@ -540,30 +539,18 @@ static ur_result_t enqueueCommandBufferFillHelper(
540539
return UR_RESULT_SUCCESS;
541540
}
542541

543-
UR_APIEXPORT ur_result_t UR_APICALL
544-
urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
545-
const ur_exp_command_buffer_desc_t *CommandBufferDesc,
546-
ur_exp_command_buffer_handle_t *CommandBuffer) {
547-
// In-order command-lists are not available in old driver version.
548-
bool CompatibleDriver = IsDriverVersionNewerOrSimilar(Context, 1, 3, 28454);
549-
const bool IsInOrder =
550-
CompatibleDriver
551-
? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false)
552-
: false;
542+
static ur_result_t
543+
createMainCommandList(ur_context_handle_t Context, ur_device_handle_t Device,
544+
bool IsInOrder, bool isUpdatable, bool isCopy,
545+
ze_command_list_handle_t &CommandList) {
553546

554-
uint32_t QueueGroupOrdinal =
555-
Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::type::Compute]
556-
.ZeOrdinal;
547+
auto type = isCopy ? ur_device_handle_t_::queue_group_info_t::type::MainCopy
548+
: ur_device_handle_t_::queue_group_info_t::type::Compute;
549+
uint32_t QueueGroupOrdinal = Device->QueueGroup[type].ZeOrdinal;
557550

558551
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
559552
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
560553

561-
ze_command_list_handle_t ZeCommandListResetEvents;
562-
// Create a command-list for reseting the events associated to enqueued cmd.
563-
ZE2UR_CALL(zeCommandListCreate,
564-
(Context->ZeContext, Device->ZeDevice, &ZeCommandListDesc,
565-
&ZeCommandListResetEvents));
566-
567554
// For non-linear graph, dependencies between commands are explicitly enforced
568555
// by sync points when enqueuing. Consequently, relax the command ordering in
569556
// the command list can enable the backend to further optimize the workload
@@ -573,41 +560,77 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
573560
DEBUG_LOG(ZeCommandListDesc.flags);
574561

575562
ZeStruct<ze_mutable_command_list_exp_desc_t> ZeMutableCommandListDesc;
576-
if (CommandBufferDesc && CommandBufferDesc->isUpdatable) {
563+
if (isUpdatable) {
577564
ZeMutableCommandListDesc.flags = 0;
578565
ZeCommandListDesc.pNext = &ZeMutableCommandListDesc;
579566
}
580567

581-
ze_command_list_handle_t ZeComputeCommandList;
582-
// TODO We could optimize this by pooling both Level Zero command-lists and UR
583-
// command-buffers, then reusing them.
584568
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
585-
&ZeCommandListDesc, &ZeComputeCommandList));
569+
&ZeCommandListDesc, &CommandList));
586570

587-
// Create a list for copy commands.
588-
// Note that to simplify the implementation, the current implementation only
589-
// uses the main copy engine and does not use the link engine even if
590-
// available.
571+
return UR_RESULT_SUCCESS;
572+
}
573+
574+
static ur_result_t
575+
appendPreconditionEvents(ze_command_list_handle_t CommandList,
576+
ur_event_handle_t WaitEvent,
577+
ur_event_handle_t AllResetEvent) {
578+
std::vector<ze_event_handle_t> PrecondEvents = {WaitEvent->ZeEvent,
579+
AllResetEvent->ZeEvent};
580+
ZE2UR_CALL(
581+
zeCommandListAppendBarrier,
582+
(CommandList, nullptr, PrecondEvents.size(), PrecondEvents.data()));
583+
}
584+
585+
static bool
586+
enableInOrder(ur_context_handle_t Context,
587+
const ur_exp_command_buffer_desc_t *CommandBufferDesc) {
588+
// In-order command-lists are not available in old driver version.
589+
bool CompatibleDriver = IsDriverVersionNewerOrSimilar(Context, 1, 3, 28454);
590+
return CompatibleDriver
591+
? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false)
592+
: false;
593+
}
594+
UR_APIEXPORT ur_result_t UR_APICALL
595+
urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
596+
const ur_exp_command_buffer_desc_t *CommandBufferDesc,
597+
ur_exp_command_buffer_handle_t *CommandBuffer) {
598+
599+
ur_event_handle_t SignalEvent;
600+
ur_event_handle_t WaitEvent;
601+
ur_event_handle_t AllResetEvent;
602+
603+
UR_CALL(EventCreate(Context, nullptr, false, false, &SignalEvent, false,
604+
!CommandBufferDesc->enableProfiling));
605+
UR_CALL(EventCreate(Context, nullptr, false, false, &WaitEvent, false,
606+
!CommandBufferDesc->enableProfiling));
607+
UR_CALL(EventCreate(Context, nullptr, false, false, &AllResetEvent, false,
608+
!CommandBufferDesc->enableProfiling));
609+
610+
bool IsInOrder = enableInOrder(Context, CommandBufferDesc);
611+
bool IsUpdatable = CommandBufferDesc && CommandBufferDesc->isUpdatable;
612+
613+
ze_command_list_handle_t ZeComputeCommandList = nullptr;
614+
UR_CALL(createMainCommandList(Context, Device, IsInOrder, IsUpdatable, false,
615+
ZeComputeCommandList));
616+
UR_CALL(
617+
appendPreconditionEvents(ZeComputeCommandList, WaitEvent, AllResetEvent));
618+
619+
ze_command_list_handle_t ZeCommandListResetEvents = nullptr;
620+
UR_CALL(createMainCommandList(Context, Device, false, false, false,
621+
ZeCommandListResetEvents));
622+
ZE2UR_CALL(zeCommandListAppendEventReset,
623+
(ZeCommandListResetEvents, SignalEvent->ZeEvent));
624+
625+
// Create a list for copy commands. Note that to simplify the implementation,
626+
// the current implementation only uses the main copy engine and does not use
627+
// the link engine even if available.
591628
ze_command_list_handle_t ZeCopyCommandList = nullptr;
592-
ZeStruct<ze_command_list_desc_t> ZeCopyCommandListDesc;
593629
if (Device->hasMainCopyEngine()) {
594-
uint32_t QueueGroupOrdinalCopy =
595-
Device
596-
->QueueGroup
597-
[ur_device_handle_t_::queue_group_info_t::type::MainCopy]
598-
.ZeOrdinal;
599-
600-
ZeCopyCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinalCopy;
601-
// Dependencies between commands are explicitly enforced by sync points when
602-
// enqueuing. Consequently, relax the command ordering in the command list
603-
// can enable the backend to further optimize the workload
604-
ZeCopyCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING;
605-
606-
// TODO We could optimize this by pooling both Level Zero command-lists and
607-
// UR command-buffers, then reusing them.
608-
ZE2UR_CALL(zeCommandListCreate,
609-
(Context->ZeContext, Device->ZeDevice, &ZeCopyCommandListDesc,
610-
&ZeCopyCommandList));
630+
UR_CALL(createMainCommandList(Context, Device, false, false, true,
631+
ZeCopyCommandList));
632+
UR_CALL(
633+
appendPreconditionEvents(ZeCopyCommandList, WaitEvent, AllResetEvent));
611634
}
612635

613636
ze_command_list_handle_t ZeComputeCommandListTranslated = nullptr;
@@ -618,46 +641,14 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
618641
try {
619642
*CommandBuffer = new ur_exp_command_buffer_handle_t_(
620643
Context, Device, ZeComputeCommandList, ZeComputeCommandListTranslated,
621-
ZeCommandListResetEvents, ZeCopyCommandList, ZeCommandListDesc,
622-
ZeCopyCommandListDesc, CommandBufferDesc, IsInOrder);
644+
ZeCommandListResetEvents, ZeCopyCommandList, SignalEvent, WaitEvent,
645+
AllResetEvent, CommandBufferDesc, IsInOrder);
623646
} catch (const std::bad_alloc &) {
624647
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
625648
} catch (...) {
626649
return UR_RESULT_ERROR_UNKNOWN;
627650
}
628651

629-
// Create signal & wait events to be used in the command-list for sync
630-
// on command-buffer enqueue.
631-
auto RetCommandBuffer = *CommandBuffer;
632-
UR_CALL(EventCreate(Context, nullptr, false, false,
633-
&RetCommandBuffer->SignalEvent, false,
634-
!RetCommandBuffer->IsProfilingEnabled));
635-
UR_CALL(EventCreate(Context, nullptr, false, false,
636-
&RetCommandBuffer->WaitEvent, false,
637-
!RetCommandBuffer->IsProfilingEnabled));
638-
UR_CALL(EventCreate(Context, nullptr, false, false,
639-
&RetCommandBuffer->AllResetEvent, false,
640-
!RetCommandBuffer->IsProfilingEnabled));
641-
642-
// Add prefix commands
643-
ZE2UR_CALL(
644-
zeCommandListAppendEventReset,
645-
(ZeCommandListResetEvents, RetCommandBuffer->SignalEvent->ZeEvent));
646-
std::vector<ze_event_handle_t> PrecondEvents = {
647-
RetCommandBuffer->WaitEvent->ZeEvent,
648-
RetCommandBuffer->AllResetEvent->ZeEvent};
649-
ZE2UR_CALL(zeCommandListAppendBarrier,
650-
(ZeComputeCommandList, nullptr, PrecondEvents.size(),
651-
PrecondEvents.data()));
652-
653-
if (Device->hasMainCopyEngine()) {
654-
// The copy command-list must be executed once the preconditions have been
655-
// met. We therefore begin this command-list with a barrier on the
656-
// preconditions.
657-
ZE2UR_CALL(zeCommandListAppendBarrier,
658-
(ZeCopyCommandList, nullptr, PrecondEvents.size(),
659-
PrecondEvents.data()));
660-
}
661652
return UR_RESULT_SUCCESS;
662653
}
663654

@@ -1164,6 +1155,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFence(
11641155
ZeFence = ZeWorkloadFenceForQueue->second;
11651156
ZE2UR_CALL(zeFenceReset, (ZeFence));
11661157
}
1158+
this->ZeActiveFence = ZeFence;
11671159
return UR_RESULT_SUCCESS;
11681160
}
11691161

@@ -1273,7 +1265,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
12731265
auto Queue = Legacy(UrQueue);
12741266
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
12751267

1276-
const auto UseCopyEngine = false;
12771268
ze_command_queue_handle_t ZeCommandQueue;
12781269
CommandBuffer->getZeCommandQueue(Queue, false, ZeCommandQueue);
12791270

@@ -1309,10 +1300,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
13091300
(ZeCopyCommandQueue, 1, &CommandBuffer->ZeCopyCommandList, nullptr));
13101301
}
13111302

1312-
// Execution event for this enqueue of the UR command-buffer
1313-
ur_event_handle_t RetEvent{};
1314-
1315-
// Create a command-list to signal RetEvent on completion
1303+
// Create a command-list to signal the Event on completion
13161304
ur_command_list_ptr_t SignalCommandList{};
13171305
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
13181306
false, NumEventsInWaitList,

source/adapters/level_zero/command_buffer.hpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
3131
ze_command_list_handle_t CommandListTranslated,
3232
ze_command_list_handle_t CommandListResetEvents,
3333
ze_command_list_handle_t CopyCommandList,
34-
ZeStruct<ze_command_list_desc_t> ZeDesc,
35-
ZeStruct<ze_command_list_desc_t> ZeCopyDesc,
34+
ur_event_handle_t SignalEvent,
35+
ur_event_handle_t WaitEvent,
36+
ur_event_handle_t AllResetEvent,
3637
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList);
3738

3839
~ur_exp_command_buffer_handle_t_();
@@ -70,12 +71,17 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
7071
ze_command_list_handle_t ZeComputeCommandListTranslated;
7172
// Level Zero command list handle
7273
ze_command_list_handle_t ZeCommandListResetEvents;
73-
// Level Zero command list descriptor
74-
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
7574
// Level Zero Copy command list handle
7675
ze_command_list_handle_t ZeCopyCommandList;
77-
// Level Zero Copy command list descriptor
78-
ZeStruct<ze_command_list_desc_t> ZeCopyCommandListDesc;
76+
// Event which will signals the most recent execution of the command-buffer
77+
// has finished
78+
ur_event_handle_t SignalEvent = nullptr;
79+
// Event which a command-buffer waits on until the wait-list dependencies
80+
// passed to a command-buffer enqueue have been satisfied.
81+
ur_event_handle_t WaitEvent = nullptr;
82+
// Event which a command-buffer waits on until the main command-list event
83+
// have been reset.
84+
ur_event_handle_t AllResetEvent = nullptr;
7985
// This flag is must be set to false if at least one copy command has been
8086
// added to `ZeCopyCommandList`
8187
bool MCopyCommandListEmpty = true;
@@ -94,15 +100,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
94100
ur_exp_command_buffer_sync_point_t NextSyncPoint;
95101
// List of Level Zero events associated to submitted commands.
96102
std::vector<ze_event_handle_t> ZeEventsList;
97-
// Event which will signals the most recent execution of the command-buffer
98-
// has finished
99-
ur_event_handle_t SignalEvent = nullptr;
100-
// Event which a command-buffer waits on until the wait-list dependencies
101-
// passed to a command-buffer enqueue have been satisfied.
102-
ur_event_handle_t WaitEvent = nullptr;
103-
// Event which a command-buffer waits on until the main command-list event
104-
// have been reset.
105-
ur_event_handle_t AllResetEvent = nullptr;
103+
106104
// Indicates if command-buffer commands can be updated after it is closed.
107105
bool IsUpdatable = false;
108106
// Indicates if command buffer was finalized.

0 commit comments

Comments
 (0)