Skip to content

Commit 981786b

Browse files
author
Fabio Mestre
committed
Address review comments
1 parent 9ac195e commit 981786b

File tree

4 files changed

+57
-37
lines changed

4 files changed

+57
-37
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ namespace {
2626
// given Context and Device.
2727
bool checkImmediateAppendSupport(ur_context_handle_t Context,
2828
ur_device_handle_t Device) {
29-
/* Minimum driver version that support
30-
* zeCommandListImmediateAppendCommandListsExp */
29+
// TODO The L0 driver is not reporting this extension yet. Once it does,
30+
// switch to using the variable zeDriverImmediateCommandListAppendFound.
31+
32+
// Minimum version that supports zeCommandListImmediateAppendCommandListsExp.
3133
constexpr uint32_t MinDriverVersion = 30898;
3234
bool DriverSupportsImmediateAppend =
3335
Context->getPlatform()->isDriverVersionNewerOrSimilar(1, 3,
@@ -320,7 +322,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
320322
ur_event_handle_t ExecutionFinishedEvent, ur_event_handle_t WaitEvent,
321323
ur_event_handle_t AllResetEvent, ur_event_handle_t CopyFinishedEvent,
322324
ur_event_handle_t ComputeFinishedEvent,
323-
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList)
325+
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList, const bool UseImmediateAppendPath)
324326
: Context(Context), Device(Device), ZeComputeCommandList(CommandList),
325327
ZeComputeCommandListTranslated(CommandListTranslated),
326328
ZeCommandListResetEvents(CommandListResetEvents),
@@ -331,7 +333,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
331333
ZeActiveFence(nullptr), SyncPoints(), NextSyncPoint(0),
332334
IsUpdatable(Desc ? Desc->isUpdatable : false),
333335
IsProfilingEnabled(Desc ? Desc->enableProfiling : false),
334-
IsInOrderCmdList(IsInOrderCmdList) {
336+
IsInOrderCmdList(IsInOrderCmdList), UseImmediateAppendPath(UseImmediateAppendPath) {
335337
ur::level_zero::urContextRetain(Context);
336338
ur::level_zero::urDeviceRetain(Device);
337339
}
@@ -358,7 +360,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
358360
ZE_CALL_NOCHECK(zeCommandListDestroy, (ZeCommandListResetEvents));
359361
}
360362

361-
// Release additional signal and wait events used by command_buffer
363+
// Release additional events used by the command_buffer.
362364
if (ExecutionFinishedEvent) {
363365
CleanupCompletedEvent(ExecutionFinishedEvent, false);
364366
urEventReleaseInternal(ExecutionFinishedEvent);
@@ -373,12 +375,14 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
373375
}
374376

375377
if (CopyFinishedEvent) {
376-
CleanupCompletedEvent(CopyFinishedEvent, false);
378+
CleanupCompletedEvent(CopyFinishedEvent, false /*QueueLocked*/,
379+
false /*SetEventCompleted*/);
377380
urEventReleaseInternal(CopyFinishedEvent);
378381
}
379382

380383
if (ComputeFinishedEvent) {
381-
CleanupCompletedEvent(ComputeFinishedEvent, false);
384+
CleanupCompletedEvent(ComputeFinishedEvent, false /*QueueLocked*/,
385+
false /*SetEventCompleted*/);
382386
urEventReleaseInternal(ComputeFinishedEvent);
383387
}
384388

@@ -544,7 +548,7 @@ bool canBeInOrder(ur_context_handle_t Context,
544548
}
545549

546550
/**
547-
* Append the initials barriers to the Compute and Copy command-lists.
551+
* Append the initial barriers to the Compute and Copy command-lists.
548552
* These barriers wait for all the events to be reset before starting execution
549553
* of the command-buffer
550554
* @param CommandBuffer The CommandBuffer
@@ -576,7 +580,7 @@ ur_result_t appendExecutionWaits(ur_exp_command_buffer_handle_t CommandBuffer,
576580
(CommandBuffer->ZeComputeCommandList, nullptr,
577581
PrecondEvents.size(), PrecondEvents.data()));
578582

579-
if (CommandBuffer->Device->hasMainCopyEngine()) {
583+
if (CommandBuffer->ZeCopyCommandList) {
580584
ZE2UR_CALL(zeCommandListAppendBarrier,
581585
(CommandBuffer->ZeCopyCommandList, nullptr,
582586
PrecondEvents.size(), PrecondEvents.data()));
@@ -619,16 +623,18 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
619623
}
620624

621625
if (EnableProfiling) {
622-
UR_CALL(EventCreate(Context, nullptr, false, false, &ComputeFinishedEvent,
626+
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
627+
false /*HostVisible*/, &ComputeFinishedEvent,
623628
UseCounterBasedEvents, !EnableProfiling));
624629
}
625630
}
626631

627632
// The WaitEvent is needed only when using WaitEvent Path.
628633
ur_event_handle_t WaitEvent = nullptr;
629634
if (WaitEventPath) {
630-
UR_CALL(EventCreate(Context, nullptr, false, false, &WaitEvent, false,
631-
!EnableProfiling));
635+
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
636+
false /*HostVisible*/, &WaitEvent,
637+
false /*CounterBasedEventEnabled*/, !EnableProfiling));
632638
}
633639

634640
// Create ZeCommandListResetEvents only if counter-based events are not being
@@ -638,15 +644,17 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
638644
ur_event_handle_t AllResetEvent = nullptr;
639645
ur_event_handle_t ExecutionFinishedEvent = nullptr;
640646
if (!UseCounterBasedEvents) {
641-
UR_CALL(EventCreate(Context, nullptr, false, false, &AllResetEvent, false,
642-
!EnableProfiling));
647+
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
648+
false /*HostVisible*/, &AllResetEvent,
649+
false /*CounterBasedEventEnabled*/, !EnableProfiling));
643650

644651
UR_CALL(createMainCommandList(Context, Device, false, false, false,
645652
ZeCommandListResetEvents));
646653

647654
// The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents.
648-
UR_CALL(EventCreate(Context, nullptr, false, false, &ExecutionFinishedEvent,
649-
false, !EnableProfiling));
655+
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
656+
false /*HostVisible*/, &ExecutionFinishedEvent, false,
657+
!EnableProfiling));
650658
}
651659

652660
UR_CALL(createMainCommandList(Context, Device, IsInOrder, IsUpdatable, false,
@@ -670,7 +678,7 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
670678
Context, Device, ZeComputeCommandList, ZeComputeCommandListTranslated,
671679
ZeCommandListResetEvents, ZeCopyCommandList, ExecutionFinishedEvent,
672680
WaitEvent, AllResetEvent, CopyFinishedEvent, ComputeFinishedEvent,
673-
CommandBufferDesc, IsInOrder);
681+
CommandBufferDesc, IsInOrder, ImmediateAppendPath);
674682
} catch (const std::bad_alloc &) {
675683
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
676684
} catch (...) {
@@ -809,11 +817,10 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
809817
// It is not allowed to append to command list from multiple threads.
810818
std::scoped_lock<ur_shared_mutex> Guard(CommandBuffer->Mutex);
811819

812-
if (checkImmediateAppendSupport(CommandBuffer->Context,
813-
CommandBuffer->Device)) {
814-
finalizeImmediateAppendPath(CommandBuffer);
820+
if (CommandBuffer->UseImmediateAppendPath) {
821+
UR_CALL(finalizeImmediateAppendPath(CommandBuffer));
815822
} else {
816-
finalizeWaitEventPath(CommandBuffer);
823+
UR_CALL(finalizeWaitEventPath(CommandBuffer));
817824
}
818825

819826
// Close the command lists and have them ready for dispatch.
@@ -875,7 +882,7 @@ ur_result_t
875882
createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,
876883
ur_kernel_handle_t Kernel, uint32_t WorkDim,
877884
const size_t *LocalWorkSize,
878-
ur_exp_command_buffer_command_handle_t *Command) {
885+
ur_exp_command_buffer_command_handle_t &Command) {
879886

880887
assert(CommandBuffer->IsUpdatable);
881888

@@ -897,7 +904,7 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,
897904
DEBUG_LOG(CommandId);
898905

899906
try {
900-
*Command = new ur_exp_command_buffer_command_handle_t_(
907+
Command = new ur_exp_command_buffer_command_handle_t_(
901908
CommandBuffer, CommandId, WorkDim, LocalWorkSize != nullptr, Kernel);
902909
} catch (const std::bad_alloc &) {
903910
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
@@ -960,7 +967,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
960967

961968
if (Command) {
962969
UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize,
963-
Command));
970+
*Command));
964971
}
965972

966973
std::vector<ze_event_handle_t> ZeEventList;
@@ -1424,7 +1431,7 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
14241431
* profiling.
14251432
* @return UR_RESULT_SUCCESS or an error code on failure.
14261433
*/
1427-
ur_result_t doProfiling(ur_exp_command_buffer_handle_t CommandBuffer,
1434+
ur_result_t appendProfilingQueries(ur_exp_command_buffer_handle_t CommandBuffer,
14281435
ze_command_list_handle_t CommandList,
14291436
ur_event_handle_t SignalEvent,
14301437
ur_event_handle_t WaitEvent) {
@@ -1471,8 +1478,8 @@ ur_result_t enqueueImmediateAppendPath(
14711478
if (!CommandBuffer->MCopyCommandListEmpty) {
14721479
ur_command_list_ptr_t ZeCopyEngineImmediateListHelper{};
14731480
UR_CALL(Queue->Context->getAvailableCommandList(
1474-
Queue, ZeCopyEngineImmediateListHelper, true, NumEventsInWaitList,
1475-
EventWaitList, false));
1481+
Queue, ZeCopyEngineImmediateListHelper, true /*UseCopyEngine*/, NumEventsInWaitList,
1482+
EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/));
14761483
assert(ZeCopyEngineImmediateListHelper->second.IsImmediate);
14771484

14781485
ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp,
@@ -1492,7 +1499,7 @@ ur_result_t enqueueImmediateAppendPath(
14921499
EventToSignal, WaitList.Length, WaitList.ZeEventList));
14931500

14941501
if (DoProfiling) {
1495-
UR_CALL(doProfiling(CommandBuffer, CommandListHelper->first, *Event,
1502+
UR_CALL(appendProfilingQueries(CommandBuffer, CommandListHelper->first, *Event,
14961503
CommandBuffer->ComputeFinishedEvent));
14971504
}
14981505

@@ -1580,7 +1587,7 @@ ur_result_t enqueueWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer,
15801587
(SignalCommandList->first, CommandBuffer->AllResetEvent->ZeEvent));
15811588

15821589
if (DoProfiling) {
1583-
UR_CALL(doProfiling(CommandBuffer, SignalCommandList->first, *Event,
1590+
UR_CALL(appendProfilingQueries(CommandBuffer, SignalCommandList->first, *Event,
15841591
CommandBuffer->ExecutionFinishedEvent));
15851592
} else {
15861593
ZE2UR_CALL(zeCommandListAppendBarrier,
@@ -1610,15 +1617,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
16101617

16111618
ur_command_list_ptr_t ZeCommandListHelper{};
16121619
UR_CALL(UrQueue->Context->getAvailableCommandList(
1613-
UrQueue, ZeCommandListHelper, false, NumEventsInWaitList, EventWaitList,
1614-
false));
1620+
UrQueue, ZeCommandListHelper, false /*UseCopyEngine*/,
1621+
NumEventsInWaitList, EventWaitList, false /*AllowBatching*/,
1622+
nullptr /*ForcedCmdQueue*/));
16151623

16161624
UR_CALL(createEventAndAssociateQueue(
16171625
UrQueue, OutEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
16181626
ZeCommandListHelper, IsInternal, false, std::nullopt));
16191627

1620-
if (checkImmediateAppendSupport(CommandBuffer->Context,
1621-
CommandBuffer->Device)) {
1628+
if (CommandBuffer->UseImmediateAppendPath) {
16221629
UR_CALL(enqueueImmediateAppendPath(
16231630
CommandBuffer, UrQueue, NumEventsInWaitList, EventWaitList, OutEvent,
16241631
ZeCommandListHelper, DoProfiling));

source/adapters/level_zero/command_buffer.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
3535
ur_event_handle_t ExecutionFinishedEvent, ur_event_handle_t WaitEvent,
3636
ur_event_handle_t AllResetEvent, ur_event_handle_t CopyFinishedEvent,
3737
ur_event_handle_t ComputeFinishedEvent,
38-
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList);
38+
const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList, const bool UseImmediateAppendPath);
3939

4040
void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
4141
ur_event_handle_t Event);
@@ -74,7 +74,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
7474
ur_context_handle_t Context;
7575
// Device associated with this command buffer
7676
ur_device_handle_t Device;
77-
// Level Zero command list handle that has the compute engine command for this
77+
// Level Zero command list handle that has the compute engine commands for this
7878
// command-buffer.
7979
ze_command_list_handle_t ZeComputeCommandList;
8080
// Given a multi driver scenario, the driver handle must be translated to the
@@ -83,7 +83,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
8383
// Level Zero command list handle that is responsible for resetting
8484
// the events after the compute and copy command-lists execute.
8585
ze_command_list_handle_t ZeCommandListResetEvents;
86-
// Level Zero command list handle that has the copy engine command for this
86+
// Level Zero command list handle that has the copy engine commands for this
8787
// command-buffer.
8888
ze_command_list_handle_t ZeCopyCommandList;
8989
// Event which will signals the most recent execution of the command-buffer
@@ -134,6 +134,9 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
134134
bool IsProfilingEnabled = false;
135135
// Command-buffer can be submitted to an in-order command-list.
136136
bool IsInOrderCmdList = false;
137+
// Whether this command-buffer should use the code path that uses
138+
// zeCommandListImmediateAppendCommandListsExp during enqueue.
139+
bool UseImmediateAppendPath = false;
137140
// This list is needed to release all kernels retained by the
138141
// command_buffer.
139142
std::vector<ur_kernel_handle_t> KernelsList;
@@ -155,4 +158,4 @@ struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {
155158
// Set to true if the user set the local work size on command creation.
156159
bool UserDefinedLocalSize;
157160
ur_kernel_handle_t Kernel;
158-
};
161+
};

source/adapters/level_zero/platform.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,15 @@ ur_result_t ur_platform_handle_t_::initialize() {
239239
ZeDriverEventPoolCountingEventsExtensionFound = true;
240240
}
241241
}
242+
243+
// Check if the ImmediateAppendCommandLists extension is available.
244+
if (strncmp(extension.name, ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME,
245+
strlen(ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME) + 1) == 0) {
246+
if (extension.version ==
247+
ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_CURRENT) {
248+
zeDriverImmediateCommandListAppendFound = true;
249+
}
250+
}
242251
zeDriverExtensionMap[extension.name] = extension.version;
243252
}
244253

source/adapters/level_zero/platform.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct ur_platform_handle_t_ : public _ur_platform {
5959
bool ZeDriverGlobalOffsetExtensionFound{false};
6060
bool ZeDriverModuleProgramExtensionFound{false};
6161
bool ZeDriverEventPoolCountingEventsExtensionFound{false};
62+
bool zeDriverImmediateCommandListAppendFound{false};
6263

6364
// Cache UR devices for reuse
6465
std::vector<std::unique_ptr<ur_device_handle_t_>> URDevicesCache;

0 commit comments

Comments
 (0)