Skip to content

Commit da5f7b9

Browse files
Add debug keys to scratch register write after walker
Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent 35064c3 commit da5f7b9

File tree

6 files changed

+116
-2
lines changed

6 files changed

+116
-2
lines changed

opencl/source/command_queue/gpgpu_walker_base.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
216216
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2;
217217
}
218218

219+
if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
220+
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
221+
}
219222
return expectedSizeCS;
220223
}
221224

opencl/source/command_queue/hardware_interface_base.inl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
160160
HardwareCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand(commandStream, commandQueue, mainKernel, postSyncAddress);
161161
}
162162

163+
if (PauseOnGpuProperties::GpuScratchRegWriteAllowed(DebugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
164+
uint32_t registerOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get();
165+
uint32_t registerData = DebugManager.flags.GpuScratchRegWriteRegisterData.get();
166+
LriHelper<GfxFamily>::program(commandStream, registerOffset, registerData, EncodeSetMMIO<GfxFamily>::isRemapApplicable(registerOffset));
167+
}
168+
163169
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnEnqueue.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount(), PauseOnGpuProperties::PauseMode::AfterWorkload)) {
164170
dispatchDebugPauseCommands(commandStream, commandQueue, DebugPauseState::waitingForUserEndConfirmation, DebugPauseState::hasUserEndConfirmation);
165171
}
@@ -311,5 +317,4 @@ inline void HardwareInterface<GfxFamily>::dispatchDebugPauseCommands(
311317
}
312318
}
313319
}
314-
315320
} // namespace NEO

opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,21 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14181418
return false;
14191419
}
14201420

1421+
template <typename FamilyType>
1422+
bool verifyLoadRegImm(const GenCmdList::iterator &iterator) {
1423+
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
1424+
uint32_t expectedRegisterOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get();
1425+
uint32_t expectedRegisterData = DebugManager.flags.GpuScratchRegWriteRegisterData.get();
1426+
auto loadRegImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*iterator);
1427+
1428+
if ((expectedRegisterOffset == loadRegImm->getRegisterOffset()) &&
1429+
(expectedRegisterData == loadRegImm->getDataDword())) {
1430+
return true;
1431+
}
1432+
1433+
return false;
1434+
}
1435+
14211436
template <typename MI_SEMAPHORE_WAIT>
14221437
void findSemaphores(GenCmdList &cmdList) {
14231438
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
@@ -1452,6 +1467,20 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14521467
}
14531468
}
14541469

1470+
template <typename FamilyType>
1471+
void findLoadRegImms(GenCmdList &cmdList) {
1472+
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
1473+
auto loadRegImm = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
1474+
1475+
while (loadRegImm != cmdList.end()) {
1476+
if (verifyLoadRegImm<FamilyType>(loadRegImm)) {
1477+
loadRegImmsFound++;
1478+
}
1479+
1480+
loadRegImm = find<MI_LOAD_REGISTER_IMM *>(++loadRegImm, cmdList.end());
1481+
}
1482+
}
1483+
14551484
DebugManagerStateRestore restore;
14561485

14571486
const size_t off[3] = {0, 0, 0};
@@ -1463,6 +1492,7 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14631492
uint32_t semaphoreAfterWalkerFound = 0;
14641493
uint32_t pipeControlBeforeWalkerFound = 0;
14651494
uint32_t pipeControlAfterWalkerFound = 0;
1495+
uint32_t loadRegImmsFound = 0;
14661496
};
14671497

14681498
HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
@@ -1612,3 +1642,64 @@ HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontI
16121642

16131643
pCmdQ->setIsSpecialCommandQueue(false);
16141644
}
1645+
1646+
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchWalkersThenInsertLoadRegisterImmCommandAroundSpecifiedEnqueue) {
1647+
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
1648+
DebugManager.flags.GpuScratchRegWriteRegisterData.set(0x1234);
1649+
DebugManager.flags.GpuScratchRegWriteRegisterOffset.set(0x5678);
1650+
1651+
MockKernelWithInternals mockKernel(*pClDevice);
1652+
1653+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1654+
1655+
HardwareParse hwParser;
1656+
1657+
hwParser.parseCommands<FamilyType>(*pCmdQ);
1658+
1659+
findLoadRegImms<FamilyType>(hwParser.cmdList);
1660+
1661+
EXPECT_EQ(0u, loadRegImmsFound);
1662+
1663+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1664+
hwParser.parseCommands<FamilyType>(*pCmdQ);
1665+
1666+
findLoadRegImms<FamilyType>(hwParser.cmdList);
1667+
1668+
EXPECT_EQ(1u, loadRegImmsFound);
1669+
}
1670+
1671+
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkersThenInsertLoadRegisterImmCommandOnlyOnce) {
1672+
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
1673+
DebugManager.flags.GpuScratchRegWriteRegisterData.set(0x1234);
1674+
DebugManager.flags.GpuScratchRegWriteRegisterOffset.set(0x5678);
1675+
1676+
MockKernelWithInternals mockKernel(*pClDevice);
1677+
1678+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1679+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1680+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1681+
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr);
1682+
1683+
HardwareParse hwParser;
1684+
1685+
hwParser.parseCommands<FamilyType>(*pCmdQ);
1686+
1687+
findLoadRegImms<FamilyType>(hwParser.cmdList);
1688+
1689+
EXPECT_EQ(1u, loadRegImmsFound);
1690+
}
1691+
1692+
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStreamSizeThenMiLoadRegisterImmCommandSizeIsIncluded) {
1693+
MockKernelWithInternals mockKernel(*pClDevice);
1694+
DispatchInfo dispatchInfo;
1695+
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
1696+
dispatchInfo.setKernel(mockKernel.mockKernel);
1697+
multiDispatchInfo.push(dispatchInfo);
1698+
1699+
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
1700+
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
1701+
1702+
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
1703+
1704+
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
1705+
}

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,6 @@ OverrideProfilingTimerResolution = -1
216216
PreferCopyEngineForCopyBufferToBuffer = -1
217217
EnableStaticPartitioning = -1
218218
DisableDeepBind = 0
219+
GpuScratchRegWriteAfterWalker = -1
220+
GpuScratchRegWriteRegisterData = 0
221+
GpuScratchRegWriteRegisterOffset = 0

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableMockSourceLevelDebugger, 0, "Switches driv
8686
DECLARE_DEBUG_VARIABLE(int32_t, ForceBtpPrefetchMode, -1, "-1: default, 0: disable, 1: enable, Enables Btp prefetching")
8787
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPointerImport, -1, "-1: default - disabled, 0: disabled, 1: enabled, Experimental implementation to import Host Pointer into L0")
8888
DECLARE_DEBUG_VARIABLE(int32_t, OverrideProfilingTimerResolution, -1, "-1: default - disabled, 0<=: Override deviceInfo.profilingTimerResolution")
89+
DECLARE_DEBUG_VARIABLE(int32_t, GpuScratchRegWriteAfterWalker, -1, "-1: disabled, x: add GPU scratch register write after x walker")
90+
DECLARE_DEBUG_VARIABLE(int32_t, GpuScratchRegWriteRegisterOffset, 0, "register offset for GPU scratch register write after walker")
91+
DECLARE_DEBUG_VARIABLE(int32_t, GpuScratchRegWriteRegisterData, 0, "register data for GPU scratch register write after walker")
8992

9093
/*LOGGING FLAGS*/
9194
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

shared/source/helpers/pause_on_gpu_properties.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -46,5 +46,14 @@ inline bool pauseModeAllowed(int32_t debugFlagValue, uint32_t taskCount, PauseMo
4646

4747
return (debugFlagValue == static_cast<int32_t>(taskCount));
4848
}
49+
50+
inline bool GpuScratchRegWriteAllowed(int32_t debugFlagValue, uint32_t taskCount) {
51+
if (!featureEnabled(debugFlagValue)) {
52+
// feature disabled
53+
return false;
54+
}
55+
56+
return (debugFlagValue == static_cast<int32_t>(taskCount));
57+
}
4958
} // namespace PauseOnGpuProperties
5059
} // namespace NEO

0 commit comments

Comments
 (0)