Skip to content

Commit ee418ef

Browse files
Per-thread scratch offset calculation
Related-To: NEO-6404 Signed-off-by: Mateusz Hoppe <[email protected]>
1 parent 125946e commit ee418ef

File tree

7 files changed

+55
-6
lines changed

7 files changed

+55
-6
lines changed

level_zero/tools/source/debug/debug_session.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,14 @@ bool DebugSession::isBindlessSystemRoutine() {
203203
return false;
204204
}
205205

206+
size_t DebugSession::getPerThreadScratchOffset(size_t ptss, EuThread::ThreadId threadId) {
207+
auto hwInfo = connectedDevice->getHwInfo();
208+
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
209+
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
210+
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
211+
212+
auto threadOffset = (((threadId.slice * numSubslicesPerSlice + threadId.subslice) * numEuPerSubslice + threadId.eu) * numThreadsPerEu + threadId.thread) * ptss;
213+
return threadOffset;
214+
}
215+
206216
} // namespace L0

level_zero/tools/source/debug/debug_session.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ struct DebugSession : _zet_debug_session_handle_t {
8787

8888
std::vector<EuThread::ThreadId> getSingleThreadsForDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread, const NEO::HardwareInfo &hwInfo);
8989

90+
size_t getPerThreadScratchOffset(size_t ptss, EuThread::ThreadId threadId);
91+
9092
DebugAreaHeader debugArea;
9193

9294
Device *connectedDevice = nullptr;

level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,29 @@ TEST(DebugSession, givenDifferentCombinationsOfThreadsAndMemoryTypeCheckExpected
430430
EXPECT_EQ(ZE_RESULT_SUCCESS, retVal);
431431
}
432432

433+
TEST(DebugSession, givenDifferentThreadsWhenGettingPerThreadScratchOffsetThenCorrectOffsetReturned) {
434+
auto hwInfo = *NEO::defaultHwInfo.get();
435+
NEO::Device *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
436+
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());
437+
auto debugSession = std::make_unique<DebugSessionMock>(zet_debug_config_t{0x1234}, &deviceImp);
438+
439+
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
440+
EuThread::ThreadId thread0Eu0 = {0, 0, 0, 0, 0};
441+
EuThread::ThreadId thread0Eu1 = {0, 0, 0, 1, 0};
442+
EuThread::ThreadId thread2Subslice1 = {0, 0, 1, 0, 2};
443+
444+
const uint32_t ptss = 128;
445+
446+
auto size = debugSession->getPerThreadScratchOffset(ptss, thread0Eu0);
447+
EXPECT_EQ(0u, size);
448+
449+
size = debugSession->getPerThreadScratchOffset(ptss, thread0Eu1);
450+
EXPECT_EQ(ptss * numThreadsPerEu, size);
451+
452+
size = debugSession->getPerThreadScratchOffset(ptss, thread2Subslice1);
453+
EXPECT_EQ(2 * ptss + ptss * hwInfo.gtSystemInfo.MaxEuPerSubSlice * numThreadsPerEu, size);
454+
}
455+
433456
using DebugSessionMultiTile = Test<MultipleDevicesWithCustomHwInfo>;
434457

435458
TEST_F(DebugSessionMultiTile, givenApiThreadAndMultipleTilesWhenConvertingToPhysicalThenCorrectValueReturned) {

level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct DebugSessionMock : public L0::DebugSession {
2727
using L0::DebugSession::allThreads;
2828
using L0::DebugSession::debugArea;
2929
using L0::DebugSession::fillDevicesFromThread;
30+
using L0::DebugSession::getPerThreadScratchOffset;
3031
using L0::DebugSession::getSingleThreadsForDevice;
3132
using L0::DebugSession::isBindlessSystemRoutine;
3233

opencl/test/unit_test/helpers/hw_helper_tests.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,16 @@ HWTEST_F(HwHelperTest, givenGetRenderSurfaceStateBaseAddressCalledThenCorrectVal
13211321
EXPECT_EQ(expectedBaseAddress, hwHelper.getRenderSurfaceStateBaseAddress(&renderSurfaceState));
13221322
}
13231323

1324+
HWTEST_F(HwHelperTest, givenGetRenderSurfaceStatePitchCalledThenCorrectValueIsReturned) {
1325+
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
1326+
1327+
RENDER_SURFACE_STATE renderSurfaceState;
1328+
uint32_t expectedPitch = 0x400;
1329+
renderSurfaceState.setSurfacePitch(expectedPitch);
1330+
const auto &hwHelper = HwHelper::get(renderCoreFamily);
1331+
EXPECT_EQ(expectedPitch, hwHelper.getRenderSurfaceStatePitch(&renderSurfaceState));
1332+
}
1333+
13241334
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenCLImageFormatsWhenCallingIsFormatRedescribableThenCorrectValueReturned) {
13251335
static const cl_image_format redescribeFormats[] = {
13261336
{CL_R, CL_UNSIGNED_INT8},

shared/source/helpers/hw_helper.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class HwHelper {
146146

147147
virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
148148
virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
149+
virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
149150
virtual size_t getMax3dImageWidthOrHeight() const = 0;
150151
virtual uint64_t getMaxMemAllocSize() const = 0;
151152
virtual bool isStatelesToStatefullWithOffsetSupported() const = 0;
@@ -202,6 +203,14 @@ class HwHelperHw : public HwHelper {
202203
return messageExtDescriptor.getBindlessSurfaceOffsetToPatch();
203204
}
204205

206+
uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const override {
207+
return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfaceBaseAddress();
208+
}
209+
210+
uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const override {
211+
return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfacePitch();
212+
}
213+
205214
const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
206215

207216
size_t getMaxBarrierRegisterPerSlice() const override;
@@ -369,7 +378,6 @@ class HwHelperHw : public HwHelper {
369378
void adjustPreemptionSurfaceSize(size_t &csrSize) const override;
370379

371380
bool isScratchSpaceSurfaceStateAccessible() const override;
372-
uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const override;
373381

374382
size_t getMax3dImageWidthOrHeight() const override;
375383
uint64_t getMaxMemAllocSize() const override;

shared/source/helpers/hw_helper_base.inl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -624,11 +624,6 @@ template <typename GfxFamily>
624624
void HwHelperHw<GfxFamily>::adjustPreemptionSurfaceSize(size_t &csrSize) const {
625625
}
626626

627-
template <typename GfxFamily>
628-
uint64_t HwHelperHw<GfxFamily>::getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const {
629-
return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfaceBaseAddress();
630-
}
631-
632627
template <typename GfxFamily>
633628
void HwHelperHw<GfxFamily>::encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) {
634629
EncodeSurfaceState<GfxFamily>::encodeBuffer(args);

0 commit comments

Comments
 (0)