Skip to content

Commit 18adbed

Browse files
feat(zebin): add thread scheduling mode support
Resolves: NEO-7197 Signed-off-by: Krystian Chmielewski <[email protected]>
1 parent 01b3249 commit 18adbed

File tree

22 files changed

+149
-52
lines changed

22 files changed

+149
-52
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2285,7 +2285,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
22852285
if (!containsAnyKernel) {
22862286
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
22872287
finalStreamState = requiredStreamState;
2288-
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), device->getDevicePreemptionMode(), hwInfo);
2288+
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
22892289
containsAnyKernel = true;
22902290
}
22912291

@@ -2298,7 +2298,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
22982298
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
22992299
}
23002300

2301-
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernel.getSchedulingHintExp(), device->getDevicePreemptionMode(), hwInfo);
2301+
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
23022302

23032303
if (finalStreamState.stateComputeMode.isDirty() && !getLogicalStateHelper()) {
23042304
bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute);

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,18 +1066,14 @@ NEO::GraphicsAllocation *KernelImp::getIsaAllocation() const {
10661066
}
10671067

10681068
ze_result_t KernelImp::setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) {
1069-
1069+
auto &threadArbitrationPolicy = const_cast<NEO::ThreadArbitrationPolicy &>(getKernelDescriptor().kernelAttributes.threadArbitrationPolicy);
10701070
if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST) {
1071-
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::AgeBased;
1071+
threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
10721072
} else if (pHint->flags == ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN) {
1073-
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobin;
1073+
threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
10741074
} else {
1075-
this->schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
1075+
threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
10761076
}
10771077
return ZE_RESULT_SUCCESS;
1078-
}
1079-
1080-
int32_t KernelImp::getSchedulingHintExp() const {
1081-
return this->schedulingHintExpFlag;
1082-
}
1078+
} // namespace L0
10831079
} // namespace L0

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ struct KernelImp : Kernel {
160160
}
161161

162162
ze_result_t setSchedulingHintExp(ze_scheduling_hint_exp_desc_t *pHint) override;
163-
int32_t getSchedulingHintExp() const override;
164163

165164
NEO::ImplicitArgs *getImplicitArgs() const override { return pImplicitArgs.get(); }
166165

@@ -222,7 +221,6 @@ struct KernelImp : Kernel {
222221

223222
bool kernelHasIndirectAccess = true;
224223

225-
int32_t schedulingHintExpFlag = NEO::ThreadArbitrationPolicy::NotPresent;
226224
std::unique_ptr<NEO::ImplicitArgs> pImplicitArgs;
227225

228226
std::unique_ptr<KernelExt> pExtension;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,23 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithOldestFirstThreadArbitrat
6666
ze_scheduling_hint_exp_desc_t pHint{};
6767
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_OLDEST_FIRST;
6868
kernel->setSchedulingHintExp(&pHint);
69-
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::AgeBased);
69+
ASSERT_EQ(kernel->getKernelDescriptor().kernelAttributes.threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::AgeBased);
7070
}
7171

7272
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
7373
createKernel();
7474
ze_scheduling_hint_exp_desc_t pHint{};
7575
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_ROUND_ROBIN;
7676
kernel->setSchedulingHintExp(&pHint);
77-
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobin);
77+
ASSERT_EQ(kernel->getKernelDescriptor().kernelAttributes.threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobin);
7878
}
7979

8080
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithStallRRThreadArbitrationPolicySetUsingSchedulingHintExtensionThenCorrectInternalPolicyIsReturned) {
8181
createKernel();
8282
ze_scheduling_hint_exp_desc_t pHint{};
8383
pHint.flags = ZE_SCHEDULING_HINT_EXP_FLAG_STALL_BASED_ROUND_ROBIN;
8484
kernel->setSchedulingHintExp(&pHint);
85-
ASSERT_EQ(kernel->getSchedulingHintExp(), NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency);
85+
ASSERT_EQ(kernel->getKernelDescriptor().kernelAttributes.threadArbitrationPolicy, NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency);
8686
}
8787

8888
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithThreadArbitrationPolicySetUsingSchedulingHintExtensionTheSameFlagIsUsedToSetCmdListThreadArbitrationPolicy) {

opencl/source/command_queue/enqueue_common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
768768
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), // preemptionMode
769769
numGrfRequired, // numGrfRequired
770770
L3CachingSettings::l3CacheOn, // l3CacheSettings
771-
kernel->getThreadArbitrationPolicy(), // threadArbitrationPolicy
771+
kernel->getDescriptor().kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
772772
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
773773
kernel->getExecutionType(), // kernelExecutionType
774774
memoryCompressionState, // memoryCompressionState

opencl/source/helpers/hardware_commands_helper_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
144144
sizeCrossThreadData, sizePerThreadData, hardwareInfo);
145145
auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily);
146146
hwInfoConfig.updateIddCommand(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired,
147-
kernel.getThreadArbitrationPolicy());
147+
kernelDescriptor.kernelAttributes.threadArbitrationPolicy);
148148

149149
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup,
150150
slmTotalSize, SlmPolicy::SlmPolicyNone);

opencl/source/helpers/task_information.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
188188
preemptionMode, // preemptionMode
189189
kernelDescriptor.kernelAttributes.numGrfRequired, // numGrfRequired
190190
L3CachingSettings::l3CacheOn, // l3CacheSettings
191-
kernel->getThreadArbitrationPolicy(), // threadArbitrationPolicy
191+
kernelDescriptor.kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
192192
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
193193
kernel->getExecutionType(), // kernelExecutionType
194194
memoryCompressionState, // memoryCompressionState

opencl/source/kernel/kernel.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,12 @@ cl_int Kernel::initialize() {
232232
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, areMultipleSubDevicesInContext());
233233
}
234234

235-
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
235+
auto &threadArbitrationPolicy = const_cast<ThreadArbitrationPolicy &>(kernelInfo.kernelDescriptor.kernelAttributes.threadArbitrationPolicy);
236+
if (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) {
237+
threadArbitrationPolicy = static_cast<ThreadArbitrationPolicy>(hwHelper.getDefaultThreadArbitrationPolicy());
238+
}
236239
if (false == kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress) {
237-
setThreadArbitrationPolicy(ThreadArbitrationPolicy::AgeBased);
240+
threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
238241
}
239242

240243
auto &clHwHelper = ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
@@ -2230,18 +2233,19 @@ void Kernel::updateAuxTranslationRequired() {
22302233
int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
22312234
auto &hwInfo = clDevice.getHardwareInfo();
22322235
auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
2236+
auto &threadArbitrationPolicy = const_cast<ThreadArbitrationPolicy &>(getDescriptor().kernelAttributes.threadArbitrationPolicy);
22332237
if (!hwHelper.isSupportedKernelThreadArbitrationPolicy()) {
2234-
this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
2238+
threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
22352239
return CL_INVALID_DEVICE;
22362240
} else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_ROUND_ROBIN_INTEL) {
2237-
this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
2241+
threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobin;
22382242
} else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_OLDEST_FIRST_INTEL) {
2239-
this->threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
2243+
threadArbitrationPolicy = ThreadArbitrationPolicy::AgeBased;
22402244
} else if (policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_AFTER_DEPENDENCY_ROUND_ROBIN_INTEL ||
22412245
policy == CL_KERNEL_EXEC_INFO_THREAD_ARBITRATION_POLICY_STALL_BASED_ROUND_ROBIN_INTEL) {
2242-
this->threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency;
2246+
threadArbitrationPolicy = ThreadArbitrationPolicy::RoundRobinAfterDependency;
22432247
} else {
2244-
this->threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
2248+
threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
22452249
return CL_INVALID_VALUE;
22462250
}
22472251
return CL_SUCCESS;

opencl/source/kernel/kernel.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -310,9 +310,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
310310

311311
bool isBuiltIn = false;
312312

313-
int32_t getThreadArbitrationPolicy() const {
314-
return threadArbitrationPolicy;
315-
}
316313
KernelExecutionType getExecutionType() const {
317314
return executionType;
318315
}
@@ -353,9 +350,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
353350
bool areStatelessWritesUsed() { return containsStatelessWrites; }
354351
int setKernelThreadArbitrationPolicy(uint32_t propertyValue);
355352
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType);
356-
void setThreadArbitrationPolicy(int32_t policy) {
357-
this->threadArbitrationPolicy = policy;
358-
}
359353
void getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *globalWorkSize, const size_t *globalWorkOffset,
360354
size_t *localWorkSize);
361355
uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const;
@@ -521,8 +515,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
521515
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
522516
KernelExecutionType executionType = KernelExecutionType::Default;
523517

524-
int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
525-
526518
uint32_t patchedArgumentsNum = 0;
527519
uint32_t startOffset = 0;
528520
uint32_t statelessUncacheableArgsCount = 0;

opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,7 @@ HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhe
318318
&newThreadArbitrationPolicy // const void *param_value
319319
);
320320
EXPECT_EQ(CL_SUCCESS, retVal);
321-
EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->threadArbitrationPolicy);
322-
EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->getThreadArbitrationPolicy());
321+
EXPECT_EQ(getNewKernelArbitrationPolicy(newThreadArbitrationPolicy), pMockKernel->getDescriptor().kernelAttributes.threadArbitrationPolicy);
323322
}
324323

325324
HWTEST_F(clSetKernelExecInfoTests, givenKernelExecInfoThreadArbitrationPolicyWhenNotSupportedAndSettingAdditionalKernelInfoThenClInvalidDeviceIsReturned) {

0 commit comments

Comments
 (0)