Skip to content

Commit 71bef60

Browse files
bmyatesCompute-Runtime-Automation
authored andcommitted
Use max enabled slice in debugger thread mapping
Signed-off-by: Yates, Brandon <[email protected]>
1 parent 81845d3 commit 71bef60

File tree

17 files changed

+102
-19
lines changed

17 files changed

+102
-19
lines changed

level_zero/core/source/hw_helpers/l0_hw_helper_base.inl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
#include "shared/source/debug_settings/debug_settings_manager.h"
9+
#include "shared/source/helpers/hw_helper.h"
910

1011
#include "level_zero/core/source/device/device.h"
1112
#include "level_zero/core/source/event/event.h"
@@ -40,8 +41,9 @@ void L0HwHelperHw<GfxFamily>::getAttentionBitmaskForSingleThreads(const std::vec
4041
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
4142
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
4243
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
44+
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
4345

44-
bitmaskSize = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * bytesPerEu;
46+
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
4547
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
4648

4749
memset(bitmask.get(), 0, bitmaskSize);
@@ -63,11 +65,12 @@ std::vector<EuThread::ThreadId> L0HwHelperHw<GfxFamily>::getThreadsFromAttention
6365
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
6466
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
6567
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
68+
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
6669

6770
UNRECOVERABLE_IF(bytesPerEu != 1);
6871
std::vector<EuThread::ThreadId> threads;
6972

70-
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
73+
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
7174
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
7275
for (uint32_t eu = 0; eu < hwInfo.gtSystemInfo.MaxEuPerSubSlice; eu++) {
7376
size_t offset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice + eu * bytesPerEu;

level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
1212
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
1313
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
1414
const uint32_t threadsSizePerSlice = numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
15+
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
1516

1617
const uint32_t eusPerRow = 4;
1718
const uint32_t numberOfRows = 2;
1819

19-
bitmaskSize = hwInfo.gtSystemInfo.MaxSubSlicesSupported * numEuPerSubslice * bytesPerEu;
20+
bitmaskSize = std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported) * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
2021
bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
2122

2223
memset(bitmask.get(), 0, bitmaskSize);
@@ -43,11 +44,12 @@ std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBit
4344
const uint32_t threadsSizePerSubSlice = numEuPerSubslice * bytesPerEu;
4445
const uint32_t eusPerRow = 4;
4546
const uint32_t numberOfRows = 2;
47+
const uint32_t highestEnabledSlice = NEO::HwHelper::getHighestEnabledSlice(hwInfo);
4648

4749
UNRECOVERABLE_IF(bytesPerEu != 1);
4850
std::vector<EuThread::ThreadId> threads;
4951

50-
for (uint32_t slice = 0; slice < hwInfo.gtSystemInfo.MaxSlicesSupported; slice++) {
52+
for (uint32_t slice = 0; slice < std::max(highestEnabledSlice, hwInfo.gtSystemInfo.MaxSlicesSupported); slice++) {
5153
for (uint32_t subslice = 0; subslice < numSubslicesPerSlice; subslice++) {
5254

5355
size_t subSliceOffset = slice * threadsSizePerSlice + subslice * threadsSizePerSubSlice;

level_zero/core/test/unit_tests/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ int main(int argc, char **argv) {
296296
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
297297
gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount);
298298
gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount);
299-
gtSystemInfo.IsDynamicallyPopulated = false;
300299
// clang-format on
301300

302301
// Platforms with uninitialized factory are not supported

level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*
66
*/
77

8+
#include "shared/source/helpers/aligned_memory.h"
89
#include "shared/source/helpers/ptr_math.h"
910
#include "shared/test/common/helpers/debug_manager_state_restore.h"
1011
#include "shared/test/common/helpers/default_hw_info.h"
@@ -373,9 +374,44 @@ HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettin
373374
}
374375
}
375376

376-
using PlatformsWithFusedEus = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HP_CORE>;
377+
using PlatformsWithFusedEus = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HPG_CORE>;
377378
using L0HwHelperFusedEuTest = ::testing::Test;
378379

380+
HWTEST2_F(L0HwHelperFusedEuTest, givenDynamicallyPopulatesSliceInfoGreaterThanMaxSlicesSupportedThenBitmasksAreCorrect, PlatformsWithFusedEus) {
381+
auto hwInfo = *NEO::defaultHwInfo.get();
382+
auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
383+
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) {
384+
GTEST_SKIP();
385+
}
386+
387+
std::unique_ptr<uint8_t[]> bitmask;
388+
size_t size = 0;
389+
390+
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
391+
hwInfo.gtSystemInfo.MaxSlicesSupported = 2;
392+
for (int i = 0; i < GT_MAX_SLICE; i++) {
393+
hwInfo.gtSystemInfo.SliceInfo[i].Enabled = false;
394+
}
395+
hwInfo.gtSystemInfo.SliceInfo[2].Enabled = true;
396+
hwInfo.gtSystemInfo.SliceInfo[3].Enabled = true;
397+
398+
std::vector<EuThread::ThreadId> threadsWithAtt;
399+
threadsWithAtt.push_back({0, 2, 0, 0, 0});
400+
threadsWithAtt.push_back({0, 3, 0, 0, 0});
401+
l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
402+
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
403+
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
404+
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
405+
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
406+
auto expected_size = 4 * numSubslicesPerSlice * numEuPerSubslice * bytesPerEu;
407+
EXPECT_EQ(size, expected_size);
408+
409+
auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
410+
ASSERT_EQ(threads.size(), 4u);
411+
EXPECT_EQ(threads[0], threadsWithAtt[0]);
412+
EXPECT_EQ(threads[2], threadsWithAtt[1]);
413+
}
414+
379415
HWTEST2_F(L0HwHelperFusedEuTest, givenBitmaskWithAttentionBitsForSingleThreadWhenGettingThreadsThenThreadForTwoEUsReturned, PlatformsWithFusedEus) {
380416
auto hwInfo = *NEO::defaultHwInfo.get();
381417
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice <= 8) {

opencl/test/unit_test/helpers/hw_helper_tests.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,3 +1516,27 @@ HWTEST2_F(HwHelperTest, givenAtLeastXeHpPlatformWhenGettingMinimalScratchSpaceSi
15161516
const auto &hwHelper = HwHelper::get(renderCoreFamily);
15171517
EXPECT_EQ(64U, hwHelper.getMinimalScratchSpaceSize());
15181518
}
1519+
1520+
TEST(HwHelperTests, whenIsDynamicallyPopulatedisFalseThengetHighestEnabledSliceReturnsMaxSlicesSupported) {
1521+
auto hwInfo = *defaultHwInfo;
1522+
1523+
hwInfo.gtSystemInfo.IsDynamicallyPopulated = false;
1524+
hwInfo.gtSystemInfo.MaxSlicesSupported = 4;
1525+
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
1526+
auto maxSlice = hwHelper.getHighestEnabledSlice(hwInfo);
1527+
EXPECT_EQ(maxSlice, hwInfo.gtSystemInfo.MaxSlicesSupported);
1528+
}
1529+
1530+
TEST(HwHelperTests, whenIsDynamicallyPopulatedisTrueThengetHighestEnabledSliceReturnsHighestEnabledSliceInfo) {
1531+
auto hwInfo = *defaultHwInfo;
1532+
1533+
hwInfo.gtSystemInfo.IsDynamicallyPopulated = true;
1534+
hwInfo.gtSystemInfo.MaxSlicesSupported = 4;
1535+
for (int i = 0; i < GT_MAX_SLICE; i++) {
1536+
hwInfo.gtSystemInfo.SliceInfo[i].Enabled = false;
1537+
}
1538+
hwInfo.gtSystemInfo.SliceInfo[6].Enabled = true;
1539+
const auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
1540+
auto maxSlice = hwHelper.getHighestEnabledSlice(hwInfo);
1541+
EXPECT_EQ(maxSlice, 7u);
1542+
}

opencl/test/unit_test/main.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ int main(int argc, char **argv) {
299299
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
300300
gtSystemInfo.MaxSlicesSupported = std::max(gtSystemInfo.MaxSlicesSupported, gtSystemInfo.SliceCount);
301301
gtSystemInfo.MaxSubSlicesSupported = std::max(gtSystemInfo.MaxSubSlicesSupported, gtSystemInfo.SubSliceCount);
302-
gtSystemInfo.IsDynamicallyPopulated = false;
303302
// clang-format on
304303

305304
binaryNameSuffix.append(familyName[hwInfoForTests.platform.eRenderCoreFamily]);

shared/source/helpers/hw_helper.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,18 @@ uint32_t HwHelper::getSubDevicesCount(const HardwareInfo *pHwInfo) {
6666
}
6767
}
6868

69+
uint32_t HwHelper::getHighestEnabledSlice(const HardwareInfo &hwInfo) {
70+
uint32_t highestEnabledSlice = 0;
71+
if (!hwInfo.gtSystemInfo.IsDynamicallyPopulated) {
72+
return hwInfo.gtSystemInfo.MaxSlicesSupported;
73+
}
74+
for (int highestSlice = GT_MAX_SLICE - 1; highestSlice >= 0; highestSlice--) {
75+
if (hwInfo.gtSystemInfo.SliceInfo[highestSlice].Enabled) {
76+
highestEnabledSlice = highestSlice + 1;
77+
break;
78+
}
79+
}
80+
return highestEnabledSlice;
81+
}
82+
6983
} // namespace NEO

shared/source/helpers/hw_helper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class HwHelper {
6262
static bool compressedBuffersSupported(const HardwareInfo &hwInfo);
6363
static bool compressedImagesSupported(const HardwareInfo &hwInfo);
6464
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
65+
static uint32_t getHighestEnabledSlice(const HardwareInfo &hwInfo);
6566
virtual bool timestampPacketWriteSupported() const = 0;
6667
virtual bool isTimestampWaitSupportedForQueues() const = 0;
6768
virtual bool isTimestampWaitSupportedForEvents(const HardwareInfo &hwInfo) const = 0;

shared/source/helpers/hw_info.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ void setHwInfoValuesFromConfig(const uint64_t hwInfoConfig, HardwareInfo &hwInfo
7575
hwInfoIn.gtSystemInfo.SubSliceCount = subSlicePerSliceCount * sliceCount;
7676
hwInfoIn.gtSystemInfo.DualSubSliceCount = subSlicePerSliceCount * sliceCount;
7777
hwInfoIn.gtSystemInfo.EUCount = euPerSubSliceCount * subSlicePerSliceCount * sliceCount;
78+
hwInfoIn.gtSystemInfo.IsDynamicallyPopulated = true;
7879
for (uint32_t slice = 0; slice < hwInfoIn.gtSystemInfo.SliceCount; slice++) {
7980
hwInfoIn.gtSystemInfo.SliceInfo[slice].Enabled = true;
8081
}

shared/source/os_interface/linux/hw_info_config_drm.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ int HwInfoConfig::configureHwInfoDrm(const HardwareInfo *inHwInfo, HardwareInfo
110110
gtSystemInfo->MaxSubSlicesSupported = std::max(static_cast<uint32_t>(topologyData.maxSubSliceCount * topologyData.maxSliceCount), gtSystemInfo->MaxSubSlicesSupported);
111111
gtSystemInfo->MaxSlicesSupported = topologyData.maxSliceCount;
112112

113+
gtSystemInfo->IsDynamicallyPopulated = true;
113114
for (uint32_t slice = 0; slice < gtSystemInfo->SliceCount; slice++) {
114115
gtSystemInfo->SliceInfo[slice].Enabled = true;
115116
}

0 commit comments

Comments
 (0)