Skip to content

Commit 4fbb199

Browse files
Add platform parameter to configure pipe control dispatch
Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 57344fc commit 4fbb199

21 files changed

+812
-170
lines changed

level_zero/core/source/helpers/implicit_scaling_l0.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,5 @@
1010
namespace NEO {
1111
namespace ImplicitScaling {
1212
bool apiSupport = false;
13-
bool semaphoreProgrammingRequired = false;
14-
bool crossTileAtomicSynchronization = true;
1513
} // namespace ImplicitScaling
1614
} // namespace NEO

opencl/source/helpers/implicit_scaling_ocl.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,5 @@
1010
namespace NEO {
1111
namespace ImplicitScaling {
1212
bool apiSupport = true;
13-
bool semaphoreProgrammingRequired = false;
14-
bool crossTileAtomicSynchronization = true;
1513
} // namespace ImplicitScaling
1614
} // namespace NEO

opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/command_container/implicit_scaling.h"
89
#include "shared/source/command_container/walker_partition_xehp_and_later.h"
910
#include "shared/source/command_stream/linear_stream.h"
1011
#include "shared/source/gmm_helper/gmm_helper.h"
@@ -1066,8 +1067,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
10661067
memoryManager->freeGraphicsMemory(kernel->kernelInfo.kernelAllocation);
10671068
}
10681069

1069-
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
1070+
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
10701071
DebugManager.flags.EnableWalkerPartition.set(1u);
1072+
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
10711073
UltClDeviceFactory deviceFactory{1, 2};
10721074
MockClDevice *device = deviceFactory.rootDevices[0];
10731075
MockContext context{device};
@@ -1122,6 +1124,63 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart
11221124
EXPECT_EQ(returnedSize, partitionSize + baseSize);
11231125
}
11241126

1127+
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenWalkerPartitionIsOnThenSizeIsProperlyEstimated) {
1128+
DebugManager.flags.EnableWalkerPartition.set(1u);
1129+
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
1130+
UltClDeviceFactory deviceFactory{1, 2};
1131+
MockClDevice *device = deviceFactory.rootDevices[0];
1132+
MockContext context{device};
1133+
1134+
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, device, nullptr);
1135+
auto &csr = cmdQ->getUltCommandStreamReceiver();
1136+
1137+
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1;
1138+
1139+
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
1140+
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
1141+
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
1142+
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize);
1143+
1144+
DispatchInfo dispatchInfo{};
1145+
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
1146+
1147+
WalkerPartition::WalkerPartitionArgs testArgs = {};
1148+
testArgs.initializeWparidRegister = true;
1149+
testArgs.crossTileAtomicSynchronization = false;
1150+
testArgs.emitPipeControlStall = false;
1151+
testArgs.partitionCount = 2u;
1152+
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
1153+
1154+
DebugManager.flags.SynchronizeWalkerInWparidMode.set(0);
1155+
testArgs.staticPartitioning = false;
1156+
testArgs.synchronizeBeforeExecution = false;
1157+
csr.staticWorkPartitioningEnabled = false;
1158+
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
1159+
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
1160+
EXPECT_EQ(returnedSize, partitionSize + baseSize);
1161+
1162+
testArgs.staticPartitioning = true;
1163+
csr.staticWorkPartitioningEnabled = true;
1164+
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
1165+
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
1166+
EXPECT_EQ(returnedSize, partitionSize + baseSize);
1167+
1168+
DebugManager.flags.SynchronizeWalkerInWparidMode.set(1);
1169+
testArgs.synchronizeBeforeExecution = true;
1170+
testArgs.staticPartitioning = false;
1171+
csr.staticWorkPartitioningEnabled = false;
1172+
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
1173+
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
1174+
EXPECT_EQ(returnedSize, partitionSize + baseSize);
1175+
1176+
testArgs.synchronizeBeforeExecution = true;
1177+
testArgs.staticPartitioning = true;
1178+
csr.staticWorkPartitioningEnabled = true;
1179+
partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
1180+
returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
1181+
EXPECT_EQ(returnedSize, partitionSize + baseSize);
1182+
}
1183+
11251184
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPartitionIsDisabledThenSizeIsProperlyEstimated) {
11261185
DebugManager.flags.EnableWalkerPartition.set(0u);
11271186
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
@@ -1157,8 +1216,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenPipeContro
11571216
EXPECT_EQ(returnedSize, baseSize);
11581217
}
11591218

1160-
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
1219+
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
11611220
DebugManager.flags.EnableWalkerPartition.set(1u);
1221+
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
11621222

11631223
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
11641224

@@ -1185,6 +1245,35 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenQueueIsMul
11851245
EXPECT_EQ(returnedSize, partitionSize + baseSize);
11861246
}
11871247

1248+
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeControlIsNotRequiredWhenQueueIsMultiEngineCapableThenWalkerPartitionsAreEstimated) {
1249+
DebugManager.flags.EnableWalkerPartition.set(1u);
1250+
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), false);
1251+
1252+
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
1253+
1254+
size_t numPipeControls = MemorySynchronizationCommands<FamilyType>::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1;
1255+
1256+
auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) +
1257+
(sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) +
1258+
HardwareCommandsHelper<FamilyType>::getSizeRequiredCS() +
1259+
EncodeMemoryPrefetch<FamilyType>::getSizeForMemoryPrefetch(kernel->kernelInfo.heapInfo.KernelHeapSize);
1260+
1261+
WalkerPartition::WalkerPartitionArgs testArgs = {};
1262+
testArgs.initializeWparidRegister = true;
1263+
testArgs.emitPipeControlStall = false;
1264+
testArgs.crossTileAtomicSynchronization = false;
1265+
testArgs.partitionCount = 16u;
1266+
testArgs.tileCount = static_cast<uint32_t>(device->getDeviceBitfield().count());
1267+
1268+
auto partitionSize = WalkerPartition::estimateSpaceRequiredInCommandBuffer<FamilyType>(testArgs);
1269+
1270+
DispatchInfo dispatchInfo{};
1271+
dispatchInfo.setNumberOfWorkgroups({32, 1, 1});
1272+
1273+
auto returnedSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *cmdQ.get(), kernel->mockKernel, dispatchInfo);
1274+
EXPECT_EQ(returnedSize, partitionSize + baseSize);
1275+
}
1276+
11881277
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenProgramWalkerIsCalledThenWalkerPartitionLogicIsExecuted) {
11891278
if (!OSInterface::osEnableLocalMemory) {
11901279
GTEST_SKIP();

opencl/test/unit_test/command_queue/enqueue_with_walker_partition_tests.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/command_container/implicit_scaling.h"
89
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
910
#include "shared/test/common/cmd_parse/hw_parse.h"
1011
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -39,9 +40,12 @@ struct EnqueueWithWalkerPartitionTests : public ::testing::Test {
3940
std::unique_ptr<MockContext> context;
4041
};
4142

42-
HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueWithWalkerPartitionTests, givenCsrWithSpecificNumberOfTilesWhenDispatchingThenConstructCmdBufferForAllSupportedTiles) {
43+
HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueWithWalkerPartitionTests,
44+
givenCsrWithSpecificNumberOfTilesAndPipeControlWithStallRequiredWhenDispatchingThenConstructCmdBufferForAllSupportedTiles) {
4345
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
4446

47+
VariableBackup<bool> pipeControlConfigBackup(&ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired(), true);
48+
4549
MockCommandQueueHw<FamilyType> commandQueue(context.get(), rootDevice.get(), nullptr);
4650
commandQueue.gpgpuEngine = &engineControlForFusedQueue;
4751
rootDevice->setPreemptionMode(PreemptionMode::Disabled);

opencl/test/unit_test/command_queue/walker_partition_tests_xehp_and_later_1.cpp

Lines changed: 5 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
339339
&walker,
340340
totalBytesProgrammed,
341341
testArgs);
342-
EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed);
342+
EXPECT_EQ(controlSectionOffset, totalBytesProgrammed);
343343

344344
auto parsedOffset = 0u;
345345
{
@@ -399,20 +399,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd
399399
EXPECT_EQ(MI_SEMAPHORE_WAIT<FamilyType>::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, miSemaphoreWait->getCompareOperation());
400400
EXPECT_EQ(1u, miSemaphoreWait->getSemaphoreDataDword());
401401
}
402-
{
403-
auto batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
404-
ASSERT_NE(nullptr, batchBufferStart);
405-
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
406-
EXPECT_FALSE(batchBufferStart->getPredicationEnable());
407-
const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection);
408-
EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress());
409-
}
410-
{
411-
auto controlSection = reinterpret_cast<StaticPartitioningControlSection *>(ptrOffset(cmdBuffer, parsedOffset));
412-
parsedOffset += sizeof(StaticPartitioningControlSection);
413-
StaticPartitioningControlSection expectedControlSection = {};
414-
EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection)));
415-
}
402+
416403
EXPECT_EQ(parsedOffset, totalBytesProgrammed);
417404
}
418405

@@ -1162,6 +1149,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
11621149
testArgs.emitSelfCleanup = false;
11631150
testArgs.crossTileAtomicSynchronization = false;
11641151
testArgs.useAtomicsForSelfCleanup = false;
1152+
testArgs.emitPipeControlStall = false;
11651153
testArgs.staticPartitioning = true;
11661154

11671155
checkForProperCmdBufferAddressOffset = false;
@@ -1170,9 +1158,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
11701158
testArgs.workPartitionAllocationGpuVa = 0x8000444000;
11711159
auto walker = createWalker<FamilyType>(postSyncAddress);
11721160

1173-
uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) +
1174-
sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>) +
1175-
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
1161+
uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>);
11761162

11771163
uint32_t totalBytesProgrammed{};
11781164
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
@@ -1191,27 +1177,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
11911177
ASSERT_NE(nullptr, computeWalker);
11921178
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>);
11931179
}
1194-
{
1195-
auto pipeControl = genCmdCast<WalkerPartition::PIPE_CONTROL<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
1196-
ASSERT_NE(nullptr, pipeControl);
1197-
parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL<FamilyType>);
1198-
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
1199-
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), pipeControl->getDcFlushEnable());
1200-
}
1201-
{
1202-
auto batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
1203-
ASSERT_NE(nullptr, batchBufferStart);
1204-
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
1205-
EXPECT_FALSE(batchBufferStart->getPredicationEnable());
1206-
const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection);
1207-
EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress());
1208-
}
1209-
{
1210-
auto controlSection = reinterpret_cast<StaticPartitioningControlSection *>(ptrOffset(cmdBuffer, parsedOffset));
1211-
parsedOffset += sizeof(StaticPartitioningControlSection);
1212-
StaticPartitioningControlSection expectedControlSection = {};
1213-
EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection)));
1214-
}
12151180
EXPECT_EQ(parsedOffset, totalBytesProgrammed);
12161181
}
12171182

@@ -1231,8 +1196,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
12311196
auto walker = createWalker<FamilyType>(postSyncAddress);
12321197

12331198
uint64_t expectedControlSectionOffset = sizeof(WalkerPartition::LOAD_REGISTER_MEM<FamilyType>) +
1234-
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>) +
1235-
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
1199+
sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>);
12361200

12371201
uint32_t totalBytesProgrammed{};
12381202
const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset<FamilyType>(testArgs);
@@ -1260,20 +1224,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe
12601224
ASSERT_NE(nullptr, computeWalker);
12611225
parsedOffset += sizeof(WalkerPartition::COMPUTE_WALKER<FamilyType>);
12621226
}
1263-
{
1264-
auto batchBufferStart = genCmdCast<WalkerPartition::BATCH_BUFFER_START<FamilyType> *>(ptrOffset(cmdBuffer, parsedOffset));
1265-
ASSERT_NE(nullptr, batchBufferStart);
1266-
parsedOffset += sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
1267-
EXPECT_FALSE(batchBufferStart->getPredicationEnable());
1268-
const auto afterControlSectionAddress = cmdBufferGpuAddress + controlSectionOffset + sizeof(StaticPartitioningControlSection);
1269-
EXPECT_EQ(afterControlSectionAddress, batchBufferStart->getBatchBufferStartAddress());
1270-
}
1271-
{
1272-
auto controlSection = reinterpret_cast<StaticPartitioningControlSection *>(ptrOffset(cmdBuffer, parsedOffset));
1273-
parsedOffset += sizeof(StaticPartitioningControlSection);
1274-
StaticPartitioningControlSection expectedControlSection = {};
1275-
EXPECT_EQ(0, std::memcmp(&expectedControlSection, controlSection, sizeof(StaticPartitioningControlSection)));
1276-
}
12771227
EXPECT_EQ(parsedOffset, totalBytesProgrammed);
12781228
}
12791229

opencl/test/unit_test/command_stream/implicit_scaling_ocl_tests.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,3 @@ using namespace NEO;
1414
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSupportEnabled) {
1515
EXPECT_TRUE(ImplicitScaling::apiSupport);
1616
}
17-
18-
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenSemaphoreProgrammingRequiredIsFalse) {
19-
EXPECT_FALSE(ImplicitScaling::semaphoreProgrammingRequired);
20-
}
21-
22-
TEST(ImplicitScalingApiTests, givenOpenClApiUsedThenCrossTileAtomicSynchronization) {
23-
EXPECT_TRUE(ImplicitScaling::crossTileAtomicSynchronization);
24-
}

shared/source/command_container/implicit_scaling.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "shared/source/command_container/implicit_scaling.h"
99

10+
#include "shared/source/command_container/walker_partition_interface.h"
1011
#include "shared/source/debug_settings/debug_settings_manager.h"
1112
#include "shared/source/os_interface/os_interface.h"
1213

@@ -36,17 +37,17 @@ bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() {
3637
}
3738

3839
bool ImplicitScalingHelper::isSemaphoreProgrammingRequired() {
39-
auto semaphoreProgrammingRequired = ImplicitScaling::semaphoreProgrammingRequired;
40-
int overrideSemaphoreProgrammingRequired = NEO::DebugManager.flags.SynchronizeWithSemaphores.get();
40+
auto semaphoreProgrammingRequired = false;
41+
int overrideSemaphoreProgrammingRequired = DebugManager.flags.SynchronizeWithSemaphores.get();
4142
if (overrideSemaphoreProgrammingRequired != -1) {
4243
semaphoreProgrammingRequired = !!overrideSemaphoreProgrammingRequired;
4344
}
4445
return semaphoreProgrammingRequired;
4546
}
4647

47-
bool ImplicitScalingHelper::isCrossTileAtomicRequired() {
48-
auto crossTileAtomicSynchronization = ImplicitScaling::crossTileAtomicSynchronization;
49-
int overrideCrossTileAtomicSynchronization = NEO::DebugManager.flags.UseCrossAtomicSynchronization.get();
48+
bool ImplicitScalingHelper::isCrossTileAtomicRequired(bool defaultCrossTileRequirement) {
49+
auto crossTileAtomicSynchronization = defaultCrossTileRequirement;
50+
int overrideCrossTileAtomicSynchronization = DebugManager.flags.UseCrossAtomicSynchronization.get();
5051
if (overrideCrossTileAtomicSynchronization != -1) {
5152
crossTileAtomicSynchronization = !!overrideCrossTileAtomicSynchronization;
5253
}
@@ -62,7 +63,12 @@ bool ImplicitScalingHelper::isAtomicsUsedForSelfCleanup() {
6263
return useAtomics;
6364
}
6465

65-
bool ImplicitScalingHelper::isSelfCleanupRequired(bool defaultSelfCleanup) {
66+
bool ImplicitScalingHelper::isSelfCleanupRequired(const WalkerPartition::WalkerPartitionArgs &args, bool apiSelfCleanup) {
67+
bool defaultSelfCleanup = apiSelfCleanup &&
68+
(args.crossTileAtomicSynchronization ||
69+
args.synchronizeBeforeExecution ||
70+
!args.staticPartitioning);
71+
6672
int overrideProgramSelfCleanup = DebugManager.flags.ProgramWalkerPartitionSelfCleanup.get();
6773
if (overrideProgramSelfCleanup != -1) {
6874
defaultSelfCleanup = !!(overrideProgramSelfCleanup);
@@ -79,13 +85,12 @@ bool ImplicitScalingHelper::isWparidRegisterInitializationRequired() {
7985
return initWparidRegister;
8086
}
8187

82-
bool ImplicitScalingHelper::isPipeControlStallRequired() {
83-
bool emitPipeControl = true;
88+
bool ImplicitScalingHelper::isPipeControlStallRequired(bool defaultEmitPipeControl) {
8489
int overrideUsePipeControl = DebugManager.flags.UsePipeControlAfterPartitionedWalker.get();
8590
if (overrideUsePipeControl != -1) {
86-
emitPipeControl = !!(overrideUsePipeControl);
91+
defaultEmitPipeControl = !!(overrideUsePipeControl);
8792
}
88-
return emitPipeControl;
93+
return defaultEmitPipeControl;
8994
}
9095

9196
} // namespace NEO

0 commit comments

Comments
 (0)