@@ -1418,6 +1418,21 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14181418 return false ;
14191419 }
14201420
1421+ template <typename FamilyType>
1422+ bool verifyLoadRegImm (const GenCmdList::iterator &iterator) {
1423+ using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
1424+ uint32_t expectedRegisterOffset = DebugManager.flags .GpuScratchRegWriteRegisterOffset .get ();
1425+ uint32_t expectedRegisterData = DebugManager.flags .GpuScratchRegWriteRegisterData .get ();
1426+ auto loadRegImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*iterator);
1427+
1428+ if ((expectedRegisterOffset == loadRegImm->getRegisterOffset ()) &&
1429+ (expectedRegisterData == loadRegImm->getDataDword ())) {
1430+ return true ;
1431+ }
1432+
1433+ return false ;
1434+ }
1435+
14211436 template <typename MI_SEMAPHORE_WAIT>
14221437 void findSemaphores (GenCmdList &cmdList) {
14231438 auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin (), cmdList.end ());
@@ -1452,6 +1467,20 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14521467 }
14531468 }
14541469
1470+ template <typename FamilyType>
1471+ void findLoadRegImms (GenCmdList &cmdList) {
1472+ using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
1473+ auto loadRegImm = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin (), cmdList.end ());
1474+
1475+ while (loadRegImm != cmdList.end ()) {
1476+ if (verifyLoadRegImm<FamilyType>(loadRegImm)) {
1477+ loadRegImmsFound++;
1478+ }
1479+
1480+ loadRegImm = find<MI_LOAD_REGISTER_IMM *>(++loadRegImm, cmdList.end ());
1481+ }
1482+ }
1483+
14551484 DebugManagerStateRestore restore;
14561485
14571486 const size_t off[3 ] = {0 , 0 , 0 };
@@ -1463,6 +1492,7 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
14631492 uint32_t semaphoreAfterWalkerFound = 0 ;
14641493 uint32_t pipeControlBeforeWalkerFound = 0 ;
14651494 uint32_t pipeControlAfterWalkerFound = 0 ;
1495+ uint32_t loadRegImmsFound = 0 ;
14661496};
14671497
14681498HWTEST_F (PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
@@ -1612,3 +1642,64 @@ HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenDontI
16121642
16131643 pCmdQ->setIsSpecialCommandQueue (false );
16141644}
1645+
1646+ HWTEST_F (PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchWalkersThenInsertLoadRegisterImmCommandAroundSpecifiedEnqueue) {
1647+ DebugManager.flags .GpuScratchRegWriteAfterWalker .set (1 );
1648+ DebugManager.flags .GpuScratchRegWriteRegisterData .set (0x1234 );
1649+ DebugManager.flags .GpuScratchRegWriteRegisterOffset .set (0x5678 );
1650+
1651+ MockKernelWithInternals mockKernel (*pClDevice);
1652+
1653+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1654+
1655+ HardwareParse hwParser;
1656+
1657+ hwParser.parseCommands <FamilyType>(*pCmdQ);
1658+
1659+ findLoadRegImms<FamilyType>(hwParser.cmdList );
1660+
1661+ EXPECT_EQ (0u , loadRegImmsFound);
1662+
1663+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1664+ hwParser.parseCommands <FamilyType>(*pCmdQ);
1665+
1666+ findLoadRegImms<FamilyType>(hwParser.cmdList );
1667+
1668+ EXPECT_EQ (1u , loadRegImmsFound);
1669+ }
1670+
1671+ HWTEST_F (PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkersThenInsertLoadRegisterImmCommandOnlyOnce) {
1672+ DebugManager.flags .GpuScratchRegWriteAfterWalker .set (1 );
1673+ DebugManager.flags .GpuScratchRegWriteRegisterData .set (0x1234 );
1674+ DebugManager.flags .GpuScratchRegWriteRegisterOffset .set (0x5678 );
1675+
1676+ MockKernelWithInternals mockKernel (*pClDevice);
1677+
1678+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1679+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1680+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1681+ pCmdQ->enqueueKernel (mockKernel.mockKernel , 1 , off, gws, nullptr , 0 , nullptr , nullptr );
1682+
1683+ HardwareParse hwParser;
1684+
1685+ hwParser.parseCommands <FamilyType>(*pCmdQ);
1686+
1687+ findLoadRegImms<FamilyType>(hwParser.cmdList );
1688+
1689+ EXPECT_EQ (1u , loadRegImmsFound);
1690+ }
1691+
1692+ HWTEST_F (PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStreamSizeThenMiLoadRegisterImmCommandSizeIsIncluded) {
1693+ MockKernelWithInternals mockKernel (*pClDevice);
1694+ DispatchInfo dispatchInfo;
1695+ MultiDispatchInfo multiDispatchInfo (mockKernel.mockKernel );
1696+ dispatchInfo.setKernel (mockKernel.mockKernel );
1697+ multiDispatchInfo.push (dispatchInfo);
1698+
1699+ auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS (CL_COMMAND_NDRANGE_KERNEL, {}, false , false , false , *pCmdQ, multiDispatchInfo);
1700+ DebugManager.flags .GpuScratchRegWriteAfterWalker .set (1 );
1701+
1702+ auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS (CL_COMMAND_NDRANGE_KERNEL, {}, false , false , false , *pCmdQ, multiDispatchInfo);
1703+
1704+ EXPECT_EQ (baseCommandStreamSize + sizeof (typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
1705+ }
0 commit comments