Skip to content

Commit 06f30d6

Browse files
refactor: move relaxed ordering logic to PVC file
Signed-off-by: Szymon Morek <[email protected]>
1 parent 6866723 commit 06f30d6

File tree

9 files changed

+105
-85
lines changed

9 files changed

+105
-85
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ struct CommandListCoreFamily : public CommandListImp {
441441
bool isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyEngineOperation) const;
442442
bool singleEventPacketRequired(bool inputSinglePacketEventRequest) const;
443443
void programEventL3Flush(Event *event);
444-
virtual ze_result_t flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) { return ZE_RESULT_SUCCESS; };
444+
virtual ze_result_t flushInOrderCounterSignal() { return ZE_RESULT_SUCCESS; };
445445
bool isCopyOffloadAllowed(const NEO::GraphicsAllocation *srcAllocation, const NEO::GraphicsAllocation *dstAllocation, bool imageToBuffer) const;
446446
bool isSharedSystemEnabled() const;
447447
void emitMemAdviseForSystemCopy(const AlignedAllocationData &allocationStruct, size_t size);

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3058,9 +3058,11 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
30583058

30593059
template <GFXCORE_FAMILY gfxCoreFamily>
30603060
bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool dualStreamCopyOffloadOperation) {
3061-
auto ret = this->flushInOrderCounterSignal(dualStreamCopyOffloadOperation || relaxedOrderingAllowed);
3062-
if (ret != ZE_RESULT_SUCCESS) {
3063-
return ret;
3061+
if (dualStreamCopyOffloadOperation || relaxedOrderingAllowed) {
3062+
auto ret = this->flushInOrderCounterSignal();
3063+
if (ret != ZE_RESULT_SUCCESS) {
3064+
return ret;
3065+
}
30643066
}
30653067

30663068
if (hasInOrderDependencies()) {

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,14 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
245245
using BaseClass::inOrderExecInfo;
246246

247247
void printKernelsPrintfOutput(bool hangDetected);
248-
ze_result_t flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) override;
248+
ze_result_t flushInOrderCounterSignal() override;
249249
MOCKABLE_VIRTUAL ze_result_t synchronizeInOrderExecution(uint64_t timeout, bool copyOffloadSync) const;
250250
ze_result_t hostSynchronize(uint64_t timeout, bool handlePostWaitOperations);
251251
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
252252
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
253253
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
254-
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
254+
void handleInOrderNonWalkerSignaling(Event *event, bool hasRelaxedOrdering);
255+
bool handleRelaxedOrderingSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
255256
CommandQueue *getCmdQImmediate(CopyOffloadMode copyOffloadMode) const;
256257
NEO::LinearStream *getOptionalEpilogueCmdStream(NEO::LinearStream *taskCmdStream, NEO::AppendOperations appendOperation);
257258

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 16 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
590590

591591
if (isInOrderExecutionEnabled()) {
592592
// Skip only in base appendLaunchKernel(). Handle remaining operations here.
593-
handleInOrderNonWalkerSignaling(event, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, ret);
593+
bool hasRelaxedOrdering = handleRelaxedOrderingSignaling(event, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, ret);
594+
handleInOrderNonWalkerSignaling(event, hasRelaxedOrdering);
594595
}
595596
CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(event, true, false);
596597
}
@@ -599,26 +600,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
599600
}
600601

601602
template <GFXCORE_FAMILY gfxCoreFamily>
602-
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result) {
603-
bool nonWalkerSignalingHasRelaxedOrdering = false;
604-
605-
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
606-
auto counterValueBeforeSecondCheck = this->relaxedOrderingCounter;
607-
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
608-
this->relaxedOrderingCounter = counterValueBeforeSecondCheck; // dont increment twice
609-
}
610-
611-
if (nonWalkerSignalingHasRelaxedOrdering) {
612-
if (event && event->isCounterBased()) {
613-
event->hostEventSetValue(Event::STATE_INITIAL);
614-
}
615-
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, nullptr, false, nullptr, nullptr);
616-
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly(false));
617-
relaxedOrderingDispatch = true;
618-
hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch);
619-
}
620-
621-
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
603+
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignaling(Event *event, bool hasRelaxedOrdering) {
604+
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, hasRelaxedOrdering, false, CommandToPatch::Invalid);
622605
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false, false, false);
623606
}
624607

@@ -1429,9 +1412,9 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(C
14291412
}
14301413

14311414
template <GFXCORE_FAMILY gfxCoreFamily>
1432-
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) {
1415+
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushInOrderCounterSignal() {
14331416
ze_result_t ret = ZE_RESULT_SUCCESS;
1434-
if (waitOnInOrderCounterRequired && !this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) {
1417+
if (!this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) {
14351418
this->appendSignalInOrderDependencyCounter(nullptr, false, true, false, false);
14361419
this->inOrderExecInfo->addCounterValue(this->getInOrderIncrementValue());
14371420
this->handleInOrderCounterOverflow(false);
@@ -1714,48 +1697,6 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
17141697
}
17151698
}
17161699

1717-
template <GFXCORE_FAMILY gfxCoreFamily>
1718-
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
1719-
const auto copyOffloadModeForOperation = getCopyOffloadModeForOperation(copyOffload);
1720-
1721-
auto csr = getCsr(copyOffload);
1722-
if (!csr->directSubmissionRelaxedOrderingEnabled()) {
1723-
return false;
1724-
}
1725-
1726-
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
1727-
1728-
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.get() != 0) {
1729-
uint32_t relaxedOrderingCounterThreshold = csr->getDirectSubmissionRelaxedOrderingQueueDepth();
1730-
1731-
auto queueTaskCount = getCmdQImmediate(copyOffloadModeForOperation)->getTaskCount();
1732-
auto csrTaskCount = csr->peekTaskCount();
1733-
1734-
bool skipTaskCountCheck = (csrTaskCount - queueTaskCount == 1) && csr->isLatestFlushIsTaskCountUpdateOnly();
1735-
1736-
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
1737-
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
1738-
}
1739-
1740-
if (queueTaskCount == csrTaskCount || skipTaskCountCheck) {
1741-
relaxedOrderingCounter++;
1742-
} else {
1743-
// Submission from another queue. Reset counter and keep relaxed ordering allowed
1744-
relaxedOrderingCounter = 0;
1745-
this->keepRelaxedOrderingEnabled = true;
1746-
}
1747-
1748-
if (relaxedOrderingCounter > static_cast<uint64_t>(relaxedOrderingCounterThreshold)) {
1749-
this->keepRelaxedOrderingEnabled = false;
1750-
return false;
1751-
}
1752-
1753-
return (keepRelaxedOrderingEnabled && (numEvents > 0));
1754-
}
1755-
1756-
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*csr, numEvents);
1757-
}
1758-
17591700
template <GFXCORE_FAMILY gfxCoreFamily>
17601701
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExecution(uint64_t timeout, bool copyOffloadSync) const {
17611702
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now;
@@ -2046,4 +1987,14 @@ size_t CommandListCoreFamilyImmediate<gfxCoreFamily>::estimateAdditionalSizeAppe
20461987
return additionalSize;
20471988
}
20481989

1990+
template <GFXCORE_FAMILY gfxCoreFamily>
1991+
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
1992+
return false;
1993+
}
1994+
1995+
template <GFXCORE_FAMILY gfxCoreFamily>
1996+
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::handleRelaxedOrderingSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result) {
1997+
return false;
1998+
}
1999+
20492000
} // namespace L0

level_zero/core/source/cmdlist/cmdlist_hw_xe2_hpg_and_later.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,5 @@ void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t a
3535
writeTimestamp(commandContainer, registerOffset, highAddress, false, workloadPartition, postSyncCmdBuffer, copyOperation);
3636
pushTimestampPatch(outTimeStampSyncCmds, highAddress - baseAddress, postSyncCmd);
3737
}
38+
3839
} // namespace L0

level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
89
#include "shared/source/memory_manager/prefetch_manager.h"
910
#include "shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h"
1011

@@ -22,6 +23,70 @@
2223

2324
namespace L0 {
2425

26+
template <>
27+
bool CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
28+
const auto copyOffloadModeForOperation = getCopyOffloadModeForOperation(copyOffload);
29+
30+
auto csr = getCsr(copyOffload);
31+
if (!csr->directSubmissionRelaxedOrderingEnabled()) {
32+
return false;
33+
}
34+
35+
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
36+
37+
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.get() != 0) {
38+
uint32_t relaxedOrderingCounterThreshold = csr->getDirectSubmissionRelaxedOrderingQueueDepth();
39+
40+
auto queueTaskCount = getCmdQImmediate(copyOffloadModeForOperation)->getTaskCount();
41+
auto csrTaskCount = csr->peekTaskCount();
42+
43+
bool skipTaskCountCheck = (csrTaskCount - queueTaskCount == 1) && csr->isLatestFlushIsTaskCountUpdateOnly();
44+
45+
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
46+
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
47+
}
48+
49+
if (queueTaskCount == csrTaskCount || skipTaskCountCheck) {
50+
relaxedOrderingCounter++;
51+
} else {
52+
// Submission from another queue. Reset counter and keep relaxed ordering allowed
53+
relaxedOrderingCounter = 0;
54+
this->keepRelaxedOrderingEnabled = true;
55+
}
56+
57+
if (relaxedOrderingCounter > static_cast<uint64_t>(relaxedOrderingCounterThreshold)) {
58+
this->keepRelaxedOrderingEnabled = false;
59+
return false;
60+
}
61+
62+
return (keepRelaxedOrderingEnabled && (numEvents > 0));
63+
}
64+
65+
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*csr, numEvents);
66+
}
67+
68+
template <>
69+
bool CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE>::handleRelaxedOrderingSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result) {
70+
bool nonWalkerSignalingHasRelaxedOrdering = false;
71+
72+
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
73+
auto counterValueBeforeSecondCheck = this->relaxedOrderingCounter;
74+
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
75+
this->relaxedOrderingCounter = counterValueBeforeSecondCheck; // dont increment twice
76+
}
77+
78+
if (nonWalkerSignalingHasRelaxedOrdering) {
79+
if (event && event->isCounterBased()) {
80+
event->hostEventSetValue(Event::STATE_INITIAL);
81+
}
82+
result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, nullptr, false, nullptr, nullptr);
83+
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*this->commandContainer.getCommandStream(), isCopyOnly(false));
84+
relaxedOrderingDispatch = true;
85+
hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch);
86+
}
87+
return nonWalkerSignalingHasRelaxedOrdering;
88+
}
89+
2590
template struct CommandListCoreFamily<IGFX_XE_HPC_CORE>;
2691
template struct CommandListCoreFamilyImmediate<IGFX_XE_HPC_CORE>;
2792

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,7 @@ HWTEST_F(CommandListCreateTests, givenDebugFlagSetWhenCallingSynchronizeThenDont
17491749
zeEventDestroy(event);
17501750
}
17511751

1752-
HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
1752+
HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsXeHpcCore) {
17531753
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();
17541754

17551755
DebugManagerStateRestore restore;
@@ -1892,7 +1892,7 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch
18921892
driverHandle->releaseImportedPointer(dstPtr);
18931893
}
18941894

1895-
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrderingWithoutInputEventsThenCountPreviousEventAsWaitlist, IsAtLeastXeHpcCore) {
1895+
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrderingWithoutInputEventsThenCountPreviousEventAsWaitlist, IsXeHpcCore) {
18961896
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();
18971897

18981898
DebugManagerStateRestore restore;
@@ -1953,7 +1953,7 @@ HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrd
19531953
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
19541954
}
19551955

1956-
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingBarrierThenAllowForRelaxedOrdering, IsAtLeastXeHpcCore) {
1956+
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingBarrierThenAllowForRelaxedOrdering, IsXeHpcCore) {
19571957
bool useImmediateFlushTask = getHelper<L0GfxCoreHelper>().platformSupportsImmediateComputeFlushTask();
19581958

19591959
DebugManagerStateRestore restore;
@@ -2106,7 +2106,7 @@ HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingBarrierWit
21062106
EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasStallingCmds);
21072107
}
21082108

2109-
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrderingThenProgramConditionalBbStart, IsAtLeastXeHpcCore) {
2109+
HWTEST2_F(CommandListCreateTests, givenInOrderExecutionWhenDispatchingRelaxedOrderingThenProgramConditionalBbStart, IsXeHpcCore) {
21102110
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
21112111

21122112
DebugManagerStateRestore restore;

0 commit comments

Comments
 (0)