@@ -26,8 +26,10 @@ namespace {
2626// given Context and Device.
2727bool checkImmediateAppendSupport (ur_context_handle_t Context,
2828 ur_device_handle_t Device) {
29- /* Minimum driver version that support
30- * zeCommandListImmediateAppendCommandListsExp */
29+ // TODO The L0 driver is not reporting this extension yet. Once it does,
30+ // switch to using the variable zeDriverImmediateCommandListAppendFound.
31+
32+ // Minimum version that supports zeCommandListImmediateAppendCommandListsExp.
3133 constexpr uint32_t MinDriverVersion = 30898 ;
3234 bool DriverSupportsImmediateAppend =
3335 Context->getPlatform ()->isDriverVersionNewerOrSimilar (1 , 3 ,
@@ -320,7 +322,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
320322 ur_event_handle_t ExecutionFinishedEvent, ur_event_handle_t WaitEvent,
321323 ur_event_handle_t AllResetEvent, ur_event_handle_t CopyFinishedEvent,
322324 ur_event_handle_t ComputeFinishedEvent,
323- const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList)
325+ const ur_exp_command_buffer_desc_t *Desc, const bool IsInOrderCmdList, const bool UseImmediateAppendPath )
324326 : Context(Context), Device(Device), ZeComputeCommandList(CommandList),
325327 ZeComputeCommandListTranslated(CommandListTranslated),
326328 ZeCommandListResetEvents(CommandListResetEvents),
@@ -331,7 +333,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
331333 ZeActiveFence(nullptr ), SyncPoints(), NextSyncPoint(0 ),
332334 IsUpdatable(Desc ? Desc->isUpdatable : false ),
333335 IsProfilingEnabled(Desc ? Desc->enableProfiling : false ),
334- IsInOrderCmdList(IsInOrderCmdList) {
336+ IsInOrderCmdList(IsInOrderCmdList), UseImmediateAppendPath(UseImmediateAppendPath) {
335337 ur::level_zero::urContextRetain (Context);
336338 ur::level_zero::urDeviceRetain (Device);
337339}
@@ -358,7 +360,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
358360 ZE_CALL_NOCHECK (zeCommandListDestroy, (ZeCommandListResetEvents));
359361 }
360362
361- // Release additional signal and wait events used by command_buffer
363+ // Release additional events used by the command_buffer.
362364 if (ExecutionFinishedEvent) {
363365 CleanupCompletedEvent (ExecutionFinishedEvent, false );
364366 urEventReleaseInternal (ExecutionFinishedEvent);
@@ -373,12 +375,14 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() {
373375 }
374376
375377 if (CopyFinishedEvent) {
376- CleanupCompletedEvent (CopyFinishedEvent, false );
378+ CleanupCompletedEvent (CopyFinishedEvent, false /* QueueLocked*/ ,
379+ false /* SetEventCompleted*/ );
377380 urEventReleaseInternal (CopyFinishedEvent);
378381 }
379382
380383 if (ComputeFinishedEvent) {
381- CleanupCompletedEvent (ComputeFinishedEvent, false );
384+ CleanupCompletedEvent (ComputeFinishedEvent, false /* QueueLocked*/ ,
385+ false /* SetEventCompleted*/ );
382386 urEventReleaseInternal (ComputeFinishedEvent);
383387 }
384388
@@ -544,7 +548,7 @@ bool canBeInOrder(ur_context_handle_t Context,
544548}
545549
546550/* *
547- * Append the initials barriers to the Compute and Copy command-lists.
551+ * Append the initial barriers to the Compute and Copy command-lists.
548552 * These barriers wait for all the events to be reset before starting execution
549553 * of the command-buffer
550554 * @param CommandBuffer The CommandBuffer
@@ -576,7 +580,7 @@ ur_result_t appendExecutionWaits(ur_exp_command_buffer_handle_t CommandBuffer,
576580 (CommandBuffer->ZeComputeCommandList , nullptr ,
577581 PrecondEvents.size (), PrecondEvents.data ()));
578582
579- if (CommandBuffer->Device -> hasMainCopyEngine () ) {
583+ if (CommandBuffer->ZeCopyCommandList ) {
580584 ZE2UR_CALL (zeCommandListAppendBarrier,
581585 (CommandBuffer->ZeCopyCommandList , nullptr ,
582586 PrecondEvents.size (), PrecondEvents.data ()));
@@ -619,16 +623,18 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
619623 }
620624
621625 if (EnableProfiling) {
622- UR_CALL (EventCreate (Context, nullptr , false , false , &ComputeFinishedEvent,
626+ UR_CALL (EventCreate (Context, nullptr /* Queue*/ , false /* IsMultiDevice*/ ,
627+ false /* HostVisible*/ , &ComputeFinishedEvent,
623628 UseCounterBasedEvents, !EnableProfiling));
624629 }
625630 }
626631
627632 // The WaitEvent is needed only when using WaitEvent Path.
628633 ur_event_handle_t WaitEvent = nullptr ;
629634 if (WaitEventPath) {
630- UR_CALL (EventCreate (Context, nullptr , false , false , &WaitEvent, false ,
631- !EnableProfiling));
635+ UR_CALL (EventCreate (Context, nullptr /* Queue*/ , false /* IsMultiDevice*/ ,
636+ false /* HostVisible*/ , &WaitEvent,
637+ false /* CounterBasedEventEnabled*/ , !EnableProfiling));
632638 }
633639
634640 // Create ZeCommandListResetEvents only if counter-based events are not being
@@ -638,15 +644,17 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
638644 ur_event_handle_t AllResetEvent = nullptr ;
639645 ur_event_handle_t ExecutionFinishedEvent = nullptr ;
640646 if (!UseCounterBasedEvents) {
641- UR_CALL (EventCreate (Context, nullptr , false , false , &AllResetEvent, false ,
642- !EnableProfiling));
647+ UR_CALL (EventCreate (Context, nullptr /* Queue*/ , false /* IsMultiDevice*/ ,
648+ false /* HostVisible*/ , &AllResetEvent,
649+ false /* CounterBasedEventEnabled*/ , !EnableProfiling));
643650
644651 UR_CALL (createMainCommandList (Context, Device, false , false , false ,
645652 ZeCommandListResetEvents));
646653
647654 // The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents.
648- UR_CALL (EventCreate (Context, nullptr , false , false , &ExecutionFinishedEvent,
649- false , !EnableProfiling));
655+ UR_CALL (EventCreate (Context, nullptr /* Queue*/ , false /* IsMultiDevice*/ ,
656+ false /* HostVisible*/ , &ExecutionFinishedEvent, false ,
657+ !EnableProfiling));
650658 }
651659
652660 UR_CALL (createMainCommandList (Context, Device, IsInOrder, IsUpdatable, false ,
@@ -670,7 +678,7 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
670678 Context, Device, ZeComputeCommandList, ZeComputeCommandListTranslated,
671679 ZeCommandListResetEvents, ZeCopyCommandList, ExecutionFinishedEvent,
672680 WaitEvent, AllResetEvent, CopyFinishedEvent, ComputeFinishedEvent,
673- CommandBufferDesc, IsInOrder);
681+ CommandBufferDesc, IsInOrder, ImmediateAppendPath );
674682 } catch (const std::bad_alloc &) {
675683 return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
676684 } catch (...) {
@@ -809,11 +817,10 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
809817 // It is not allowed to append to command list from multiple threads.
810818 std::scoped_lock<ur_shared_mutex> Guard (CommandBuffer->Mutex );
811819
812- if (checkImmediateAppendSupport (CommandBuffer->Context ,
813- CommandBuffer->Device )) {
814- finalizeImmediateAppendPath (CommandBuffer);
820+ if (CommandBuffer->UseImmediateAppendPath ) {
821+ UR_CALL (finalizeImmediateAppendPath (CommandBuffer));
815822 } else {
816- finalizeWaitEventPath (CommandBuffer);
823+ UR_CALL ( finalizeWaitEventPath (CommandBuffer) );
817824 }
818825
819826 // Close the command lists and have them ready for dispatch.
@@ -901,7 +908,7 @@ ur_result_t
901908createCommandHandle (ur_exp_command_buffer_handle_t CommandBuffer,
902909 ur_kernel_handle_t Kernel, uint32_t WorkDim,
903910 const size_t *LocalWorkSize,
904- ur_exp_command_buffer_command_handle_t * Command) {
911+ ur_exp_command_buffer_command_handle_t & Command) {
905912
906913 assert (CommandBuffer->IsUpdatable );
907914
@@ -923,7 +930,7 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer,
923930 DEBUG_LOG (CommandId);
924931
925932 try {
926- * Command = new ur_exp_command_buffer_command_handle_t_ (
933+ Command = new ur_exp_command_buffer_command_handle_t_ (
927934 CommandBuffer, CommandId, WorkDim, LocalWorkSize != nullptr , Kernel);
928935 } catch (const std::bad_alloc &) {
929936 return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
@@ -977,7 +984,7 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
977984
978985 if (Command && CommandBuffer->IsUpdatable ) {
979986 UR_CALL (createCommandHandle (CommandBuffer, Kernel, WorkDim, LocalWorkSize,
980- Command));
987+ * Command));
981988 }
982989
983990 std::vector<ze_event_handle_t > ZeEventList;
@@ -1384,7 +1391,7 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
13841391 * profiling.
13851392 * @return UR_RESULT_SUCCESS or an error code on failure.
13861393 */
1387- ur_result_t doProfiling (ur_exp_command_buffer_handle_t CommandBuffer,
1394+ ur_result_t appendProfilingQueries (ur_exp_command_buffer_handle_t CommandBuffer,
13881395 ze_command_list_handle_t CommandList,
13891396 ur_event_handle_t SignalEvent,
13901397 ur_event_handle_t WaitEvent) {
@@ -1431,8 +1438,8 @@ ur_result_t enqueueImmediateAppendPath(
14311438 if (!CommandBuffer->MCopyCommandListEmpty ) {
14321439 ur_command_list_ptr_t ZeCopyEngineImmediateListHelper{};
14331440 UR_CALL (Queue->Context ->getAvailableCommandList (
1434- Queue, ZeCopyEngineImmediateListHelper, true , NumEventsInWaitList,
1435- EventWaitList, false ));
1441+ Queue, ZeCopyEngineImmediateListHelper, true /* UseCopyEngine */ , NumEventsInWaitList,
1442+ EventWaitList, false /* AllowBatching */ , nullptr /* ForcedCmdQueue */ ));
14361443 assert (ZeCopyEngineImmediateListHelper->second .IsImmediate );
14371444
14381445 ZE2UR_CALL (zeCommandListImmediateAppendCommandListsExp,
@@ -1452,7 +1459,7 @@ ur_result_t enqueueImmediateAppendPath(
14521459 EventToSignal, WaitList.Length , WaitList.ZeEventList ));
14531460
14541461 if (DoProfiling) {
1455- UR_CALL (doProfiling (CommandBuffer, CommandListHelper->first , *Event,
1462+ UR_CALL (appendProfilingQueries (CommandBuffer, CommandListHelper->first , *Event,
14561463 CommandBuffer->ComputeFinishedEvent ));
14571464 }
14581465
@@ -1540,7 +1547,7 @@ ur_result_t enqueueWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer,
15401547 (SignalCommandList->first , CommandBuffer->AllResetEvent ->ZeEvent ));
15411548
15421549 if (DoProfiling) {
1543- UR_CALL (doProfiling (CommandBuffer, SignalCommandList->first , *Event,
1550+ UR_CALL (appendProfilingQueries (CommandBuffer, SignalCommandList->first , *Event,
15441551 CommandBuffer->ExecutionFinishedEvent ));
15451552 } else {
15461553 ZE2UR_CALL (zeCommandListAppendBarrier,
@@ -1570,15 +1577,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
15701577
15711578 ur_command_list_ptr_t ZeCommandListHelper{};
15721579 UR_CALL (UrQueue->Context ->getAvailableCommandList (
1573- UrQueue, ZeCommandListHelper, false , NumEventsInWaitList, EventWaitList,
1574- false ));
1580+ UrQueue, ZeCommandListHelper, false /* UseCopyEngine*/ ,
1581+ NumEventsInWaitList, EventWaitList, false /* AllowBatching*/ ,
1582+ nullptr /* ForcedCmdQueue*/ ));
15751583
15761584 UR_CALL (createEventAndAssociateQueue (
15771585 UrQueue, OutEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
15781586 ZeCommandListHelper, IsInternal, false , std::nullopt ));
15791587
1580- if (checkImmediateAppendSupport (CommandBuffer->Context ,
1581- CommandBuffer->Device )) {
1588+ if (CommandBuffer->UseImmediateAppendPath ) {
15821589 UR_CALL (enqueueImmediateAppendPath (
15831590 CommandBuffer, UrQueue, NumEventsInWaitList, EventWaitList, OutEvent,
15841591 ZeCommandListHelper, DoProfiling));
0 commit comments