@@ -327,6 +327,11 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::waitForEventsFromHost() {
327327 return true ;
328328}
329329
330+ template <GFXCORE_FAMILY gfxCoreFamily>
331+ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) {
332+ return (!relaxedOrderingDispatch && (numWaitEvents > 0 || isInOrderExecutionEnabled ()));
333+ }
334+
330335template <GFXCORE_FAMILY gfxCoreFamily>
331336ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
332337 ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions,
@@ -350,7 +355,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
350355 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel (kernelHandle, threadGroupDimensions,
351356 hSignalEvent, numWaitEvents, phWaitEvents,
352357 launchParams, relaxedOrderingDispatch);
353- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
358+
359+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
354360}
355361
356362template <GFXCORE_FAMILY gfxCoreFamily>
@@ -366,7 +372,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
366372
367373 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect (kernelHandle, pDispatchArgumentsBuffer,
368374 hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
369- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
375+
376+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
370377}
371378
372379template <GFXCORE_FAMILY gfxCoreFamily>
@@ -401,6 +408,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
401408 checkWaitEventsState (numWaitEvents, phWaitEvents);
402409 }
403410
411+ bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch);
412+
404413 ze_result_t ret;
405414 CpuMemCopyInfo cpuMemCopyInfo (dstptr, srcptr, size);
406415 this ->device ->getDriverHandle ()->findAllocationDataForRange (const_cast <void *>(srcptr), size, &cpuMemCopyInfo.srcAllocData );
@@ -416,14 +425,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
416425 auto isSplitNeeded = this ->isAppendSplitNeeded (dstptr, srcptr, size, direction);
417426 if (isSplitNeeded) {
418427 relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (1 ); // split generates more than 1 event
428+ hasStallindCmds = !relaxedOrderingDispatch;
429+
419430 ret = static_cast <DeviceImp *>(this ->device )->bcsSplit .appendSplitCall <gfxCoreFamily, void *, const void *>(this , dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true , relaxedOrderingDispatch, direction, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
420431 return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy (dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u , nullptr , relaxedOrderingDispatch);
421432 });
422433 } else {
423434 ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy (dstptr, srcptr, size, hSignalEvent,
424435 numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
425436 }
426- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
437+
438+ return flushImmediate (ret, true , hasStallindCmds, relaxedOrderingDispatch, hSignalEvent);
427439}
428440
429441template <GFXCORE_FAMILY gfxCoreFamily>
@@ -446,12 +458,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
446458 checkWaitEventsState (numWaitEvents, phWaitEvents);
447459 }
448460
461+ bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch);
462+
449463 ze_result_t ret;
450464
451465 NEO::TransferDirection direction;
452466 auto isSplitNeeded = this ->isAppendSplitNeeded (dstPtr, srcPtr, this ->getTotalSizeForCopyRegion (dstRegion, dstPitch, dstSlicePitch), direction);
453467 if (isSplitNeeded) {
454468 relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed (1 ); // split generates more than 1 event
469+ hasStallindCmds = !relaxedOrderingDispatch;
470+
455471 ret = static_cast <DeviceImp *>(this ->device )->bcsSplit .appendSplitCall <gfxCoreFamily, uint32_t , uint32_t >(this , dstRegion->originX , srcRegion->originX , dstRegion->width , hSignalEvent, numWaitEvents, phWaitEvents, true , relaxedOrderingDispatch, direction, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
456472 ze_copy_region_t dstRegionLocal = {};
457473 ze_copy_region_t srcRegionLocal = {};
@@ -471,7 +487,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
471487 hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
472488 }
473489
474- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
490+ return flushImmediate (ret, true , hasStallindCmds , relaxedOrderingDispatch, hSignalEvent);
475491}
476492
477493template <GFXCORE_FAMILY gfxCoreFamily>
@@ -489,7 +505,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
489505
490506 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill (ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
491507
492- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
508+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch) , relaxedOrderingDispatch, hSignalEvent);
493509}
494510
495511template <GFXCORE_FAMILY gfxCoreFamily>
@@ -616,7 +632,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
616632
617633 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion (hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
618634 numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
619- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
635+
636+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
620637}
621638
622639template <GFXCORE_FAMILY gfxCoreFamily>
@@ -637,7 +654,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
637654 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory (hDstImage, srcPtr, pDstRegion, hSignalEvent,
638655 numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
639656
640- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
657+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch) , relaxedOrderingDispatch, hSignalEvent);
641658}
642659
643660template <GFXCORE_FAMILY gfxCoreFamily>
@@ -658,7 +675,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
658675 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory (dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
659676 numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
660677
661- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
678+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch) , relaxedOrderingDispatch, hSignalEvent);
662679}
663680
664681template <GFXCORE_FAMILY gfxCoreFamily>
@@ -690,7 +707,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
690707 }
691708
692709 auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel (kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch);
693- return flushImmediate (ret, true , false , relaxedOrderingDispatch, hSignalEvent);
710+
711+ return flushImmediate (ret, true , hasStallingCmdsForRelaxedOrdering (numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, hSignalEvent);
694712}
695713
696714template <GFXCORE_FAMILY gfxCoreFamily>
0 commit comments