Skip to content

Commit 23a7ab7

Browse files
Refactor implicit scaling barriers to add more cache flush options
Related-To: NEO-6262 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 1d52a72 commit 23a7ab7

File tree

7 files changed

+42
-29
lines changed

7 files changed

+42
-29
lines changed

opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_2.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenMiLoadRegisterRegWhenItI
328328
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedThenItIsProperlySet) {
329329
auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
330330
void *pipeControlCAddress = cmdBufferAddress;
331-
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, true);
331+
PipeControlArgs args(true);
332+
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, args);
332333
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(pipeControlCAddress);
333334
ASSERT_NE(nullptr, pipeControl);
334335
EXPECT_EQ(expectedUsedSize, totalBytesProgrammed);
@@ -340,7 +341,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlComman
340341
HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlCommandWhenItIsProgrammedWithDcFlushFalseThenExpectDcFlushFlagFalse) {
341342
auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
342343
void *pipeControlCAddress = cmdBufferAddress;
343-
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, false);
344+
PipeControlArgs args(false);
345+
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, args);
344346
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(pipeControlCAddress);
345347
ASSERT_NE(nullptr, pipeControl);
346348
EXPECT_EQ(expectedUsedSize, totalBytesProgrammed);
@@ -354,7 +356,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramPipeControlComman
354356
DebugManager.flags.DoNotFlushCaches.set(true);
355357
auto expectedUsedSize = sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
356358
void *pipeControlCAddress = cmdBufferAddress;
357-
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, true);
359+
PipeControlArgs args(true);
360+
WalkerPartition::programPipeControlCommand<FamilyType>(cmdBufferAddress, totalBytesProgrammed, args);
358361
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(pipeControlCAddress);
359362
ASSERT_NE(nullptr, pipeControl);
360363
EXPECT_EQ(expectedUsedSize, totalBytesProgrammed);
@@ -1368,10 +1371,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenDo
13681371
EXPECT_EQ(expectedOffsetSectionSize, computeBarrierControlSectionOffset<FamilyType>(testArgs));
13691372
EXPECT_EQ(expectedCommandUsedSize, estimateBarrierSpaceRequiredInCommandBuffer<FamilyType>(testArgs));
13701373

1374+
PipeControlArgs flushArgs(false);
13711375
WalkerPartition::constructBarrierCommandBuffer<FamilyType>(cmdBuffer,
13721376
gpuVirtualAddress,
13731377
totalBytesProgrammed,
1374-
testArgs);
1378+
testArgs,
1379+
flushArgs);
13751380

13761381
EXPECT_EQ(expectedCommandUsedSize, totalBytesProgrammed);
13771382

@@ -1415,7 +1420,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
14151420
testArgs.tileCount = 4u;
14161421
testArgs.emitSelfCleanup = true;
14171422
testArgs.secondaryBatchBuffer = true;
1418-
testArgs.dcFlush = true;
14191423

14201424
uint32_t totalBytesProgrammed = 0u;
14211425
uint64_t gpuVirtualAddress = 0xFF0000;
@@ -1434,10 +1438,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
14341438
EXPECT_EQ(expectedOffsetSectionSize, computeBarrierControlSectionOffset<FamilyType>(testArgs));
14351439
EXPECT_EQ(expectedCommandUsedSize, estimateBarrierSpaceRequiredInCommandBuffer<FamilyType>(testArgs));
14361440

1441+
PipeControlArgs flushArgs(true);
14371442
WalkerPartition::constructBarrierCommandBuffer<FamilyType>(cmdBuffer,
14381443
gpuVirtualAddress,
14391444
totalBytesProgrammed,
1440-
testArgs);
1445+
testArgs,
1446+
flushArgs);
14411447

14421448
EXPECT_EQ(expectedCommandUsedSize, totalBytesProgrammed);
14431449

@@ -1528,7 +1534,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
15281534
testArgs.tileCount = 4u;
15291535
testArgs.emitSelfCleanup = true;
15301536
testArgs.secondaryBatchBuffer = true;
1531-
testArgs.dcFlush = true;
15321537
testArgs.useAtomicsForSelfCleanup = true;
15331538

15341539
uint32_t totalBytesProgrammed = 0u;
@@ -1548,10 +1553,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenBarrierProgrammingWhenEm
15481553
EXPECT_EQ(expectedOffsetSectionSize, computeBarrierControlSectionOffset<FamilyType>(testArgs));
15491554
EXPECT_EQ(expectedCommandUsedSize, estimateBarrierSpaceRequiredInCommandBuffer<FamilyType>(testArgs));
15501555

1556+
PipeControlArgs flushArgs(true);
15511557
WalkerPartition::constructBarrierCommandBuffer<FamilyType>(cmdBuffer,
15521558
gpuVirtualAddress,
15531559
totalBytesProgrammed,
1554-
testArgs);
1560+
testArgs,
1561+
flushArgs);
15551562

15561563
EXPECT_EQ(expectedCommandUsedSize, totalBytesProgrammed);
15571564

shared/source/command_container/implicit_scaling.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ struct WalkerPartitionArgs;
1616

1717
namespace NEO {
1818
class LinearStream;
19+
struct PipeControlArgs;
1920

2021
namespace ImplicitScaling {
2122
extern bool apiSupport;
@@ -56,8 +57,8 @@ struct ImplicitScalingDispatch {
5657
static size_t getBarrierSize(bool apiSelfCleanup);
5758
static void dispatchBarrierCommands(LinearStream &commandStream,
5859
const DeviceBitfield &devices,
60+
PipeControlArgs &flushArgs,
5961
bool apiSelfCleanup,
60-
bool dcFlush,
6162
bool useSecondaryBatchBuffer);
6263

6364
private:

shared/source/command_container/implicit_scaling_xehp_and_later.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,13 @@ size_t ImplicitScalingDispatch<GfxFamily>::getBarrierSize(bool apiSelfCleanup) {
140140
template <typename GfxFamily>
141141
void ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(LinearStream &commandStream,
142142
const DeviceBitfield &devices,
143+
PipeControlArgs &flushArgs,
143144
bool apiSelfCleanup,
144-
bool dcFlush,
145145
bool useSecondaryBatchBuffer) {
146146
uint32_t totalProgrammedSize = 0u;
147147

148148
WalkerPartition::WalkerPartitionArgs args = {};
149149
args.emitSelfCleanup = apiSelfCleanup;
150-
args.dcFlush = dcFlush;
151150
args.useAtomicsForSelfCleanup = ImplicitScalingHelper::isAtomicsUsedForSelfCleanup();
152151
args.tileCount = static_cast<uint32_t>(devices.count());
153152
args.secondaryBatchBuffer = useSecondaryBatchBuffer;
@@ -158,7 +157,8 @@ void ImplicitScalingDispatch<GfxFamily>::dispatchBarrierCommands(LinearStream &c
158157
WalkerPartition::constructBarrierCommandBuffer<GfxFamily>(commandBuffer,
159158
cmdBufferGpuAddress,
160159
totalProgrammedSize,
161-
args);
160+
args,
161+
flushArgs);
162162
commandStream.getSpace(totalProgrammedSize);
163163
}
164164

shared/source/command_container/walker_partition_interface.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ struct WalkerPartitionArgs {
2626
bool initializeWparidRegister = false;
2727
bool emitPipeControlStall = false;
2828
bool preferredStaticPartitioning = false;
29-
bool dcFlush = false;
3029
};
3130

3231
constexpr uint32_t wparidCCSOffset = 0x221C;

shared/source/command_container/walker_partition_xehp_and_later.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@
1414
#include "shared/source/helpers/hw_helper.h"
1515
#include "shared/source/helpers/ptr_math.h"
1616

17+
#include "pipe_control_args.h"
18+
1719
#include <cassert>
1820
#include <optional>
1921

22+
namespace NEO {
23+
struct PipeControlArgs;
24+
}
25+
2026
namespace WalkerPartition {
2127

2228
template <typename GfxFamily>
@@ -288,16 +294,10 @@ void programMiLoadRegisterMem(void *&inputAddress, uint32_t &totalBytesProgramme
288294
}
289295

290296
template <typename GfxFamily>
291-
void programPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, bool dcFlush) {
297+
void programPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, NEO::PipeControlArgs &args) {
292298
auto pipeControl = putCommand<PIPE_CONTROL<GfxFamily>>(inputAddress, totalBytesProgrammed);
293299
PIPE_CONTROL<GfxFamily> cmd = GfxFamily::cmdInitPipeControl;
294-
295-
if (NEO::MemorySynchronizationCommands<GfxFamily>::isDcFlushAllowed()) {
296-
cmd.setDcFlushEnable(dcFlush);
297-
}
298-
if (NEO::DebugManager.flags.DoNotFlushCaches.get()) {
299-
cmd.setDcFlushEnable(false);
300-
}
300+
NEO::MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args);
301301
*pipeControl = cmd;
302302
}
303303

@@ -538,7 +538,8 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer,
538538
}
539539

540540
if (args.emitPipeControlStall) {
541-
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true);
541+
NEO::PipeControlArgs args(true);
542+
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args);
542543
}
543544

544545
if (args.semaphoreProgrammingRequired) {
@@ -665,7 +666,8 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer,
665666
}
666667

667668
if (args.emitPipeControlStall) {
668-
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache
669+
NEO::PipeControlArgs args(true);
670+
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args);
669671
}
670672

671673
// Synchronize tiles after walker
@@ -746,7 +748,8 @@ template <typename GfxFamily>
746748
void constructBarrierCommandBuffer(void *cpuPointer,
747749
uint64_t gpuAddressOfAllocation,
748750
uint32_t &totalBytesProgrammed,
749-
WalkerPartitionArgs &args) {
751+
WalkerPartitionArgs &args,
752+
NEO::PipeControlArgs &flushArgs) {
750753
void *currentBatchBufferPointer = cpuPointer;
751754
const auto controlSectionOffset = computeBarrierControlSectionOffset<GfxFamily>(args);
752755

@@ -755,7 +758,7 @@ void constructBarrierCommandBuffer(void *cpuPointer,
755758
programSelfCleanupSection<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField, args.useAtomicsForSelfCleanup);
756759
}
757760

758-
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, args.dcFlush);
761+
programPipeControlCommand<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, flushArgs);
759762

760763
const auto crossTileSyncCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BarrierControlSection, crossTileSyncCount);
761764
programTilesSynchronizationWithAtomics<GfxFamily>(currentBatchBufferPointer, totalBytesProgrammed, crossTileSyncCountField, args.tileCount);

shared/source/helpers/hw_helper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ struct MemorySynchronizationCommands {
418418
uint64_t immediateData,
419419
PipeControlArgs &args);
420420
static void setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo);
421+
static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
421422

422423
static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
423424
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
@@ -440,7 +441,6 @@ struct MemorySynchronizationCommands {
440441
static bool isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo);
441442

442443
protected:
443-
static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
444444
static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
445445
};
446446

shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
755755
estimatedSize = ImplicitScalingDispatch<FamilyType>::getBarrierSize(false);
756756
EXPECT_EQ(expectedSize, estimatedSize);
757757

758-
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, false, false, false);
758+
PipeControlArgs flushArgs(false);
759+
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, flushArgs, false, false);
759760
totalBytesProgrammed = commandStream.getUsed();
760761
EXPECT_EQ(expectedSize, totalBytesProgrammed);
761762

@@ -804,7 +805,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
804805
estimatedSize = ImplicitScalingDispatch<FamilyType>::getBarrierSize(true);
805806
EXPECT_EQ(expectedSize, estimatedSize);
806807

807-
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, true, true, true);
808+
PipeControlArgs flushArgs(true);
809+
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, flushArgs, true, true);
808810
totalBytesProgrammed = commandStream.getUsed();
809811
EXPECT_EQ(expectedSize, totalBytesProgrammed);
810812

@@ -856,7 +858,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
856858
estimatedSize = ImplicitScalingDispatch<FamilyType>::getBarrierSize(true);
857859
EXPECT_EQ(expectedSize, estimatedSize);
858860

859-
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, true, true, true);
861+
PipeControlArgs flushArgs(true);
862+
ImplicitScalingDispatch<FamilyType>::dispatchBarrierCommands(commandStream, twoTile, flushArgs, true, true);
860863
totalBytesProgrammed = commandStream.getUsed();
861864
EXPECT_EQ(expectedSize, totalBytesProgrammed);
862865

0 commit comments

Comments
 (0)