Skip to content

Commit dd01cff

Browse files
Unify logic determining thread arbitration policy value
Related-To: NEO-6728 Signed-off-by: Filip Hazubski <[email protected]>
1 parent d5fedf9 commit dd01cff

27 files changed

+109
-89
lines changed

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,6 @@ struct CommandList : _ze_command_list_handle_t {
188188
return commandListPreemptionMode;
189189
}
190190

191-
int32_t getThreadArbitrationPolicy() const {
192-
return threadArbitrationPolicy;
193-
}
194-
195191
UnifiedMemoryControls getUnifiedMemoryControls() const {
196192
return unifiedMemoryControls;
197193
}
@@ -256,7 +252,6 @@ struct CommandList : _ze_command_list_handle_t {
256252
uint32_t cmdListType = CommandListType::TYPE_REGULAR;
257253
uint32_t commandListPerThreadScratchSize = 0u;
258254
uint32_t commandListPerThreadPrivateScratchSize = 0u;
259-
int32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent;
260255
uint32_t partitionCount = 1;
261256
bool isFlushTaskSubmissionEnabled = false;
262257
bool isSyncModeQueue = false;

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
7474
void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) {
7575
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
7676
auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
77-
threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
77+
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
7878
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
7979
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
8080
}
@@ -2240,6 +2240,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
22402240
if (!containsAnyKernel) {
22412241
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
22422242
finalStreamState = requiredStreamState;
2243+
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo);
22432244
containsAnyKernel = true;
22442245
}
22452246

@@ -2252,10 +2253,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
22522253
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
22532254
}
22542255

2255-
auto &neoDevice = *device->getNEODevice();
2256-
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
2256+
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), hwInfo);
22572257

22582258
if (finalStreamState.stateComputeMode.isDirty()) {
2259+
auto &neoDevice = *device->getNEODevice();
22592260
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true);
22602261
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommand(*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, hwInfo);
22612262
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false);

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
112112

113113
NEO::Device *neoDevice = device->getNEODevice();
114114

115-
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
116-
117115
if (NEO::DebugManager.flags.EnableSWTags.get()) {
118116
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::KernelNameTag>(
119117
*commandContainer.getCommandStream(),

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,35 +27,35 @@ template <GFXCORE_FAMILY gfxCoreFamily>
2727
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration) {
2828

2929
NEO::DispatchFlags dispatchFlags(
30-
{}, //csrDependencies
31-
nullptr, //barrierTimestampPacketNodes
32-
{}, //pipelineSelectArgs
33-
nullptr, //flushStampReference
34-
NEO::QueueThrottle::MEDIUM, //throttle
35-
this->getCommandListPreemptionMode(), //preemptionMode
36-
this->commandContainer.lastSentNumGrfRequired, //numGrfRequired
37-
NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings
38-
this->getThreadArbitrationPolicy(), //threadArbitrationPolicy
39-
NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
40-
NEO::KernelExecutionType::NotApplicable, //kernelExecutionType
41-
NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState
42-
NEO::QueueSliceCount::defaultSliceCount, //sliceCount
43-
this->isSyncModeQueue, //blocking
44-
this->isSyncModeQueue, //dcFlush
45-
this->getCommandListSLMEnable(), //useSLM
46-
this->isSyncModeQueue, //guardCommandBufferWithPipeControl
47-
false, //GSBA32BitRequired
48-
false, //requiresCoherency
49-
false, //lowPriority
50-
true, //implicitFlush
51-
this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
52-
false, //epilogueRequired
53-
false, //usePerDssBackedBuffer
54-
false, //useSingleSubdevice
55-
false, //useGlobalAtomics
56-
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
57-
false, //memoryMigrationRequired
58-
false //textureCacheFlush
30+
{}, //csrDependencies
31+
nullptr, //barrierTimestampPacketNodes
32+
{}, //pipelineSelectArgs
33+
nullptr, //flushStampReference
34+
NEO::QueueThrottle::MEDIUM, //throttle
35+
this->getCommandListPreemptionMode(), //preemptionMode
36+
this->commandContainer.lastSentNumGrfRequired, //numGrfRequired
37+
NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings
38+
this->requiredStreamState.stateComputeMode.threadArbitrationPolicy.value, //threadArbitrationPolicy
39+
NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
40+
NEO::KernelExecutionType::NotApplicable, //kernelExecutionType
41+
NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState
42+
NEO::QueueSliceCount::defaultSliceCount, //sliceCount
43+
this->isSyncModeQueue, //blocking
44+
this->isSyncModeQueue, //dcFlush
45+
this->getCommandListSLMEnable(), //useSLM
46+
this->isSyncModeQueue, //guardCommandBufferWithPipeControl
47+
false, //GSBA32BitRequired
48+
false, //requiresCoherency
49+
false, //lowPriority
50+
true, //implicitFlush
51+
this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
52+
false, //epilogueRequired
53+
false, //usePerDssBackedBuffer
54+
false, //useSingleSubdevice
55+
false, //useGlobalAtomics
56+
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
57+
false, //memoryMigrationRequired
58+
false //textureCacheFlush
5959
);
6060

6161
this->commandContainer.removeDuplicatesFromResidencyContainer();

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
210210
}
211211
}
212212

213-
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
214-
if (kernelImp->getSchedulingHintExp() != NEO::ThreadArbitrationPolicy::NotPresent) {
215-
this->threadArbitrationPolicy = kernelImp->getSchedulingHintExp();
216-
}
217-
218213
auto isMultiOsContextCapable = (this->partitionCount > 1) && !isCooperative;
219214
updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative);
220215

216+
KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
221217
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
222218
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
223219

level_zero/core/source/kernel/kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
151151
virtual NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() = 0;
152152

153153
virtual ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) = 0;
154+
virtual int32_t getSchedulingHintExp() = 0;
154155

155156
Kernel() = default;
156157
Kernel(const Kernel &) = delete;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,9 +1014,6 @@ ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint
10141014
}
10151015

10161016
int32_t KernelImp::getSchedulingHintExp() {
1017-
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
1018-
this->schedulingHintExpFlag = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
1019-
}
10201017
return this->schedulingHintExpFlag;
10211018
}
10221019
} // namespace L0

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ struct KernelImp : Kernel {
152152
}
153153

154154
ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override;
155-
int32_t getSchedulingHintExp();
155+
int32_t getSchedulingHintExp() override;
156156

157157
NEO::ImplicitArgs *getImplicitArgs() const override { return pImplicitArgs.get(); }
158158

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
9494
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
9595

9696
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
97-
ASSERT_EQ(commandList->threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobin);
97+
ASSERT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value);
9898
delete (pHint);
9999
}
100100

@@ -114,7 +114,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySe
114114
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
115115

116116
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
117-
ASSERT_EQ(0, commandList->threadArbitrationPolicy);
117+
ASSERT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, commandList->getFinalStreamState().stateComputeMode.threadArbitrationPolicy.value);
118118
delete (pHint);
119119
}
120120

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,37 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernel
12341234
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
12351235
}
12361236

1237+
HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsAtLeastSkl) {
1238+
DebugManagerStateRestore restorer;
1239+
1240+
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
1241+
auto defaultThreadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
1242+
auto nonDefaultThreadArbitrationPolicy = defaultThreadArbitrationPolicy + 1;
1243+
1244+
Mock<::L0::Kernel> kernel;
1245+
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
1246+
kernel.module = pMockModule.get();
1247+
1248+
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
1249+
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
1250+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1251+
1252+
// initial kernel with no policy preference
1253+
pCommandList->updateStreamProperties(kernel, false, false);
1254+
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
1255+
1256+
// policy changed to non-default state
1257+
pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value = nonDefaultThreadArbitrationPolicy;
1258+
// another kernel with no policy preference - do not update policy
1259+
pCommandList->updateStreamProperties(kernel, false, false);
1260+
EXPECT_EQ(nonDefaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
1261+
1262+
// another kernel with no policy preference, this time with debug toggle set - update policy back to default value
1263+
DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true);
1264+
pCommandList->updateStreamProperties(kernel, false, false);
1265+
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
1266+
}
1267+
12371268
struct MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture : public MultiDeviceModuleFixture {
12381269
void SetUp() {
12391270
DebugManager.flags.EnableImplicitScaling.set(1);

0 commit comments

Comments
 (0)