Skip to content

Commit 6ffbf55

Browse files
Add new function to set max threads for workgroup
Resolves: NEO-4116 Change-Id: I8b9faf582c42edcb6f616a2f4662200d0d5b73d3 Signed-off-by: Katarzyna Cencelewska <[email protected]>
1 parent 4b2b1ac commit 6ffbf55

File tree

5 files changed

+18
-7
lines changed

5 files changed

+18
-7
lines changed

core/helpers/hw_helper.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2020 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -45,4 +45,8 @@ uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
4545
return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
4646
}
4747

48+
uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
49+
uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
50+
return maxNumEUsPerSubSlice * numThreadsPerEU;
51+
}
4852
} // namespace NEO

core/helpers/hw_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class HwHelper {
6262
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
6363
virtual std::string getExtensions() const = 0;
6464
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
65+
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
6566
virtual uint32_t getMetricsLibraryGenId() const = 0;
6667
virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
6768
virtual bool requiresAuxResolves() const = 0;
@@ -119,6 +120,8 @@ class HwHelperHw : public HwHelper {
119120

120121
size_t getMaxBarrierRegisterPerSlice() const override;
121122

123+
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
124+
122125
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
123126

124127
uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;

core/helpers/hw_helper_base.inl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,4 +277,8 @@ bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo
277277
return false;
278278
}
279279

280+
template <typename GfxFamily>
281+
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
282+
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
283+
}
280284
} // namespace NEO

runtime/device/device_caps.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -277,16 +277,16 @@ void Device::initializeCaps() {
277277
deviceInfo.maxNumEUsPerSubSlice = 0;
278278
deviceInfo.maxSliceCount = systemInfo.SliceCount;
279279
deviceInfo.numThreadsPerEU = 0;
280-
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8;
280+
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32u : 8u;
281281

282282
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
283283
? (systemInfo.EUCount / systemInfo.SubSliceCount)
284284
: systemInfo.EuCountPerPoolMin;
285285
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
286-
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
286+
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
287287

288-
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
289-
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), 1024u);
288+
maxWS = Math::prevPowerOfTwo(maxWS);
289+
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);
290290

291291
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
292292
deviceInfo.maxNumOfSubGroups = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize / simdSizeUsed);

unit_tests/device/device_caps_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ TEST(DeviceGetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreE
851851
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
852852
}
853853

854-
TEST(DeviceGetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
854+
HWTEST_F(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
855855
HardwareInfo myHwInfo = *platformDevices[0];
856856
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
857857
FeatureTable &mySkuTable = myHwInfo.featureTable;
@@ -884,7 +884,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap
884884
EXPECT_EQ(device->getDeviceInfo().maxWorkGroupSize / 32, device->getDeviceInfo().maxNumOfSubGroups);
885885
}
886886

887-
TEST(DeviceGetCaps, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
887+
HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
888888
HardwareInfo myHwInfo = *platformDevices[0];
889889
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
890890

0 commit comments

Comments
 (0)