Skip to content

Commit 8de043b

Browse files
Stop redundant SBA programming due to global atomics
For all platforms different than XE_HP_SDV (ATS) stop considering the `useGlobalAtomics` flag as a decisive factor for trigerring the SBA (StateBaseAddress) programming on the HW. Only XE_HP_SDV supports such flag. For consistency of the implementation, keep the related logic in one place only, that is a helper in `command_encoder` and then just reuse it in different places (`command_stream_receiver`). Related-To: NEO-6953 Signed-off-by: Maciej Bielski <[email protected]>
1 parent 1e0f0ef commit 8de043b

File tree

10 files changed

+94
-91
lines changed

10 files changed

+94
-91
lines changed

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp

Lines changed: 0 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -637,90 +637,6 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableGrfConfigWhenFlu
637637
}
638638
}
639639

640-
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenMultiOsContextCapableSetAndDispatchFlagsWhenFlushingTaskThenReloadSbaProperly) {
641-
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
642-
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
643-
644-
// 1. Ignore dispatchFlags.useGlobalAtomics flip if csr is not multi context capable
645-
commandStreamReceiver.multiOsContextCapable = false;
646-
647-
flushTaskFlags.useGlobalAtomics = false;
648-
auto offset = commandStreamReceiver.commandStream.getUsed();
649-
flushTask(commandStreamReceiver);
650-
651-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
652-
auto stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
653-
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
654-
655-
flushTaskFlags.useGlobalAtomics ^= true;
656-
offset = commandStreamReceiver.commandStream.getUsed();
657-
flushTask(commandStreamReceiver);
658-
659-
cmdList.clear();
660-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
661-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
662-
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
663-
664-
// 2. Reprogram SBA only if dispatchFlags.useGlobalAtomics flips and csr is multi context capable or context has several devices
665-
commandStreamReceiver.multiOsContextCapable = true;
666-
667-
flushTaskFlags.useGlobalAtomics = true;
668-
flushTaskFlags.areMultipleSubDevicesInContext = false;
669-
offset = commandStreamReceiver.commandStream.getUsed();
670-
flushTask(commandStreamReceiver);
671-
672-
cmdList.clear();
673-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
674-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
675-
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
676-
677-
flushTaskFlags.useGlobalAtomics ^= true;
678-
offset = commandStreamReceiver.commandStream.getUsed();
679-
flushTask(commandStreamReceiver);
680-
681-
cmdList.clear();
682-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
683-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
684-
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
685-
686-
offset = commandStreamReceiver.commandStream.getUsed();
687-
flushTask(commandStreamReceiver);
688-
689-
cmdList.clear();
690-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
691-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
692-
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
693-
694-
commandStreamReceiver.multiOsContextCapable = false;
695-
696-
flushTaskFlags.useGlobalAtomics = true;
697-
flushTaskFlags.areMultipleSubDevicesInContext = true;
698-
offset = commandStreamReceiver.commandStream.getUsed();
699-
flushTask(commandStreamReceiver);
700-
701-
cmdList.clear();
702-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
703-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
704-
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
705-
706-
flushTaskFlags.useGlobalAtomics ^= true;
707-
offset = commandStreamReceiver.commandStream.getUsed();
708-
flushTask(commandStreamReceiver);
709-
710-
cmdList.clear();
711-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
712-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
713-
EXPECT_NE(cmdList.end(), stateBaseAddressItor);
714-
715-
offset = commandStreamReceiver.commandStream.getUsed();
716-
flushTask(commandStreamReceiver);
717-
718-
cmdList.clear();
719-
parseCommands<FamilyType>(commandStreamReceiver.commandStream, offset);
720-
stateBaseAddressItor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
721-
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
722-
}
723-
724640
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentWhenFlushingTaskThenPreambleIsSent) {
725641
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
726642
commandStreamReceiver.isPreambleSent = false;

opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,71 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, StateBaseAddressXeHPAndLaterTests, givenNonZeroInte
489489
memoryManager->freeGraphicsMemory(allocation);
490490
}
491491

492+
namespace {
493+
494+
template <typename FamilyType, typename CommandStreamReceiverType>
495+
void flushTaskAndcheckForSBA(StateBaseAddressXeHPAndLaterTests *sbaTest, CommandStreamReceiverType &csr, bool shouldBePresent) {
496+
size_t offset = csr.commandStream.getUsed();
497+
498+
sbaTest->flushTask(csr);
499+
500+
HardwareParse hwParserCsr;
501+
hwParserCsr.parseCommands<FamilyType>(csr.commandStream, offset);
502+
hwParserCsr.findHardwareCommands<FamilyType>();
503+
if (shouldBePresent) {
504+
EXPECT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
505+
} else {
506+
EXPECT_EQ(nullptr, hwParserCsr.cmdStateBaseAddress);
507+
}
508+
}
509+
510+
template <typename FamilyType>
511+
void testGlobalAtomicsImpactOnSBA(StateBaseAddressXeHPAndLaterTests *sbaTest, bool multiOsCtx, bool multiSubDevices, bool expectSBA) {
512+
513+
auto &commandStreamReceiver = sbaTest->pDevice->getUltCommandStreamReceiver<FamilyType>();
514+
commandStreamReceiver.multiOsContextCapable = multiOsCtx;
515+
sbaTest->flushTaskFlags.areMultipleSubDevicesInContext = multiSubDevices;
516+
517+
flushTaskAndcheckForSBA<FamilyType>(sbaTest, commandStreamReceiver, true);
518+
flushTaskAndcheckForSBA<FamilyType>(sbaTest, commandStreamReceiver, false);
519+
520+
commandStreamReceiver.lastSentUseGlobalAtomics ^= true;
521+
flushTaskAndcheckForSBA<FamilyType>(sbaTest, commandStreamReceiver, expectSBA);
522+
flushTaskAndcheckForSBA<FamilyType>(sbaTest, commandStreamReceiver, false);
523+
524+
commandStreamReceiver.lastSentUseGlobalAtomics ^= true;
525+
flushTaskAndcheckForSBA<FamilyType>(sbaTest, commandStreamReceiver, expectSBA);
526+
}
527+
528+
} /* namespace */
529+
530+
struct XeHpGlobalAtomicsStateBaseAddressTests : public StateBaseAddressXeHPAndLaterTests,
531+
public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
532+
533+
HWTEST2_P(XeHpGlobalAtomicsStateBaseAddressTests, givenMultiOSContextOrMultiSubDeviceWhenLastSentUseGlobalAtomicsIsFlippedThenStatBaseAddressIsReprorammed, IsXEHP) {
534+
auto [multiOsCtx, multiSubDevices] = GetParam();
535+
testGlobalAtomicsImpactOnSBA<FamilyType>(this, multiOsCtx, multiSubDevices, multiOsCtx || multiSubDevices);
536+
}
537+
538+
INSTANTIATE_TEST_CASE_P(XeHpGlobalAtomicsStateBaseAddress,
539+
XeHpGlobalAtomicsStateBaseAddressTests,
540+
::testing::Combine(
541+
::testing::Bool(),
542+
::testing::Bool()));
543+
544+
using NonXeHpGlobalAtomicsStateBaseAddressTests = XeHpGlobalAtomicsStateBaseAddressTests;
545+
546+
HWTEST2_P(NonXeHpGlobalAtomicsStateBaseAddressTests, givenAnyMultiOSContextValueWithAnySubDeviceNumberWhenLastSentUseGlobalAtomicsIsFlippedThenStatBaseAddressProgrammingIsNeverAffected, IsNotXEHP) {
547+
auto [multiOsCtx, multiSubDevices] = GetParam();
548+
testGlobalAtomicsImpactOnSBA<FamilyType>(this, multiOsCtx, multiSubDevices, false);
549+
}
550+
551+
INSTANTIATE_TEST_CASE_P(NonXeHpGlobalAtomicsStateBaseAddress,
552+
NonXeHpGlobalAtomicsStateBaseAddressTests,
553+
::testing::Combine(
554+
::testing::Bool(),
555+
::testing::Bool()));
556+
492557
using RenderSurfaceStateXeHPAndLaterTests = XeHpCommandStreamReceiverFlushTaskTests;
493558

494559
HWCMDTEST_F(IGFX_XE_HP_CORE, RenderSurfaceStateXeHPAndLaterTests, givenSpecificProductFamilyWhenAppendingRssThenProgramGpuCoherency) {

shared/source/command_container/command_encoder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ struct EncodeDispatchKernel {
107107
static void setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
108108

109109
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder);
110+
111+
static constexpr bool shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent);
110112
};
111113

112114
template <typename GfxFamily>

shared/source/command_container/command_encoder.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,9 @@ void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPT
674674
template <typename Family>
675675
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
676676

677+
template <typename Family>
678+
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent) { return false; }
679+
677680
template <typename Family>
678681
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
679682
for (int i = 0; i < 3; ++i) {

shared/source/command_container/command_encoder_xehp_and_later.inl

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
204204
}
205205
}
206206

207-
bool requiresGlobalAtomicsUpdate = false;
208-
if (args.partitionCount > 1) {
209-
requiresGlobalAtomicsUpdate = container.lastSentUseGlobalAtomics != args.useGlobalAtomics;
210-
container.lastSentUseGlobalAtomics = args.useGlobalAtomics;
211-
}
207+
if (shouldUpdateGlobalAtomics(container.lastSentUseGlobalAtomics, args.useGlobalAtomics, args.partitionCount > 1) ||
208+
container.isAnyHeapDirty() ||
209+
args.requiresUncachedMocs) {
212210

213-
if (container.isAnyHeapDirty() || args.requiresUncachedMocs || requiresGlobalAtomicsUpdate) {
214211
PipeControlArgs syncArgs;
215212
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
216213
MemorySynchronizationCommands<Family>::addPipeControl(*container.getCommandStream(), syncArgs);

shared/source/command_stream/command_stream_receiver_hw.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
177177
bool checkPlatformSupportsGpuIdleImplicitFlush() const;
178178
void configurePostSyncWriteOffset();
179179
void unregisterDirectSubmissionFromController();
180+
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
180181

181182
HeapDirtyState dshState;
182183
HeapDirtyState iohState;

shared/source/command_stream/command_stream_receiver_hw_base.inl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
362362
latestSentStatelessMocsConfig = mocsIndex;
363363
}
364364

365-
if ((isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics)) {
365+
if (this->isGlobalAtomicsProgrammingRequired(dispatchFlags.useGlobalAtomics) && (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext)) {
366366
isStateBaseAddressDirty = true;
367367
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
368368
}
@@ -1418,4 +1418,8 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
14181418
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekHwInfo(), hasSharedHandles(), isRcs());
14191419
}
14201420

1421+
template <typename GfxFamily>
1422+
constexpr bool CommandStreamReceiverHw<GfxFamily>::isGlobalAtomicsProgrammingRequired(bool currentVal) const {
1423+
return false;
1424+
}
14211425
} // namespace NEO

shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
7373
}
7474
}
7575

76+
template <>
77+
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool predicate) {
78+
if (predicate && currentVal != refVal) {
79+
currentVal = refVal;
80+
return true;
81+
}
82+
return false;
83+
}
84+
7685
template struct EncodeDispatchKernel<Family>;
7786
template struct EncodeStates<Family>;
7887
template struct EncodeMath<Family>;

shared/source/xe_hp_core/command_stream_receiver_hw_xe_hp_core.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ size_t CommandStreamReceiverHw<Family>::getCmdSizeForPerDssBackedBuffer(const Ha
7070
template <>
7171
void CommandStreamReceiverHw<Family>::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) {}
7272

73+
template <>
74+
constexpr bool CommandStreamReceiverHw<Family>::isGlobalAtomicsProgrammingRequired(bool currentValue) const {
75+
return currentValue != this->lastSentUseGlobalAtomics;
76+
}
77+
7378
template <>
7479
void BlitCommandsHelper<Family>::appendClearColor(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd) {
7580
using XY_BLOCK_COPY_BLT = typename Family::XY_BLOCK_COPY_BLT;

shared/test/common/test_macros/header/common_matchers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ using IsADLP = IsProduct<IGFX_ALDERLAKE_P>;
5959
using IsRKL = IsProduct<IGFX_ROCKETLAKE>;
6060

6161
using IsXEHP = IsProduct<IGFX_XE_HP_SDV>;
62+
using IsNotXEHP = IsNotWithinProducts<IGFX_XE_HP_SDV, IGFX_XE_HP_SDV>;
6263

6364
using IsDG2 = IsProduct<IGFX_DG2>;
6465

0 commit comments

Comments
 (0)