Skip to content

Commit bb138e8

Browse files
Fix to optimize SBA dispatch during submissions.
Move the SBA dirty flag inside csr so it is common across command queues. Related-To: LOCI-1982 Signed-off-by: Vinod Tipparaju <[email protected]>
1 parent b2b0d39 commit bb138e8

File tree

6 files changed

+42
-8
lines changed

6 files changed

+42
-8
lines changed

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
186186
gsbaStateDirty, frontEndStateDirty,
187187
perThreadScratchSpaceSize);
188188

189-
gsbaStateDirty |= !gsbaInit;
189+
gsbaStateDirty |= csr->getGSBAStateDirty();
190190
frontEndStateDirty |= csr->getMediaVFEStateDirty();
191191
if (!isCopyOnlyCommandQueue) {
192192

level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
7676
false,
7777
1u);
7878
*pSbaCmd = sbaCmd;
79-
gsbaInit = true;
79+
csr->setGSBAStateDirty(false);
8080

8181
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
8282

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ struct CommandQueueImp : public CommandQueue {
9393
NEO::LinearStream *commandStream = nullptr;
9494
std::atomic<uint32_t> taskCount{0};
9595
std::vector<Kernel *> printfFunctionContainer;
96-
bool gsbaInit = false;
9796
bool gpgpuEnabled = false;
9897
CommandBufferManager buffers;
9998
NEO::ResidencyContainer residencyContainer;

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
810810
alignedFree(alloc);
811811
}
812812

813-
HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSDirtyFlagIsSetOnlyOnce, CommandQueueExecuteTestSupport) {
813+
HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSDirtyFlagAndGSBADirtyFlagAreSetOnlyOnce, CommandQueueExecuteTestSupport) {
814814
ze_command_queue_desc_t desc = {};
815815
NEO::CommandStreamReceiver *csr;
816816
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -832,10 +832,13 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
832832
auto commandListHandle1 = commandList1->toHandle();
833833

834834
EXPECT_EQ(true, csr->getMediaVFEStateDirty());
835+
EXPECT_EQ(true, csr->getGSBAStateDirty());
835836
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
836837
EXPECT_EQ(false, csr->getMediaVFEStateDirty());
838+
EXPECT_EQ(false, csr->getGSBAStateDirty());
837839
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
838840
EXPECT_EQ(false, csr->getMediaVFEStateDirty());
841+
EXPECT_EQ(false, csr->getGSBAStateDirty());
839842

840843
commandQueue->destroy();
841844
commandList0->destroy();
@@ -845,6 +848,7 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
845848
using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
846849
HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSIsProgrammedOnlyOnce, CommandQueueExecuteSupport) {
847850
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
851+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
848852
ze_command_queue_desc_t desc = {};
849853
NEO::CommandStreamReceiver *csr;
850854
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -875,14 +879,17 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
875879
cmdList1, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
876880

877881
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
882+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
878883
// We should have only 1 state added
879884
ASSERT_EQ(1u, mediaVfeStates.size());
885+
ASSERT_EQ(1u, GSBAStates.size());
880886

881887
commandQueue->destroy();
882888
}
883889

884-
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsWithPTSSsetForFirstCmdListThenMVSIsProgrammedOnlyOnce, CommandQueueExecuteSupport) {
890+
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsWithPTSSsetForFirstCmdListThenMVSAndGSBAAreProgrammedOnlyOnce, CommandQueueExecuteSupport) {
885891
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
892+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
886893
ze_command_queue_desc_t desc = {};
887894
NEO::CommandStreamReceiver *csr;
888895
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -913,8 +920,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
913920
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
914921

915922
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
923+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
916924
// We should have only 1 state added
917925
ASSERT_EQ(1u, mediaVfeStates.size());
926+
ASSERT_EQ(1u, GSBAStates.size());
918927

919928
commandList0->reset();
920929
commandList0->setCommandListPerThreadScratchSize(0u);
@@ -941,15 +950,18 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
941950
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
942951

943952
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
953+
GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
944954
// We should have no state added
945955
ASSERT_EQ(0u, mediaVfeStates.size());
956+
ASSERT_EQ(0u, GSBAStates.size());
946957

947958
commandQueue->destroy();
948959
commandQueue1->destroy();
949960
}
950961

951-
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSsetForSecondCmdListThenMVSIsProgrammedTwice, CommandQueueExecuteSupport) {
962+
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSsetForSecondCmdListThenMVSandGSBAAreProgrammedTwice, CommandQueueExecuteSupport) {
952963
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
964+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
953965
ze_command_queue_desc_t desc = {};
954966
NEO::CommandStreamReceiver *csr;
955967
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -980,8 +992,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
980992
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
981993

982994
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
995+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
983996
// We should have 2 states added
984997
ASSERT_EQ(2u, mediaVfeStates.size());
998+
ASSERT_EQ(2u, GSBAStates.size());
985999

9861000
commandList0->reset();
9871001
commandList0->setCommandListPerThreadScratchSize(512u);
@@ -1008,15 +1022,18 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
10081022
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
10091023

10101024
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
1025+
GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
10111026
// We should have no state added
10121027
ASSERT_EQ(0u, mediaVfeStates.size());
1028+
ASSERT_EQ(0u, GSBAStates.size());
10131029

10141030
commandQueue->destroy();
10151031
commandQueue1->destroy();
10161032
}
10171033

1018-
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSGrowingThenMVSIsProgrammedTwice, CommandQueueExecuteSupport) {
1034+
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSGrowingThenMVSAndGSBAAreProgrammedTwice, CommandQueueExecuteSupport) {
10191035
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
1036+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
10201037
ze_command_queue_desc_t desc = {};
10211038
NEO::CommandStreamReceiver *csr;
10221039
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -1047,8 +1064,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
10471064
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
10481065

10491066
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
1067+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
10501068
// We should have only 1 state added
10511069
ASSERT_EQ(1u, mediaVfeStates.size());
1070+
ASSERT_EQ(1u, GSBAStates.size());
10521071

10531072
commandList0->reset();
10541073
commandList0->setCommandListPerThreadScratchSize(1024u);
@@ -1075,15 +1094,18 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
10751094
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
10761095

10771096
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
1097+
GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
10781098
// We should have only 1 state added
10791099
ASSERT_EQ(1u, mediaVfeStates.size());
1100+
ASSERT_EQ(1u, GSBAStates.size());
10801101

10811102
commandQueue->destroy();
10821103
commandQueue1->destroy();
10831104
}
10841105

1085-
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSUniquePerCmdListThenMVSIsProgrammedOncePerSubmission, CommandQueueExecuteSupport) {
1106+
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSUniquePerCmdListThenMVSAndGSBAAreProgrammedOncePerSubmission, CommandQueueExecuteSupport) {
10861107
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
1108+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
10871109
ze_command_queue_desc_t desc = {};
10881110
NEO::CommandStreamReceiver *csr;
10891111
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
@@ -1114,8 +1136,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
11141136
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
11151137

11161138
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
1139+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
11171140
// We should have 2 states added
11181141
ASSERT_EQ(2u, mediaVfeStates.size());
1142+
ASSERT_EQ(2u, GSBAStates.size());
11191143

11201144
commandList0->reset();
11211145
commandList0->setCommandListPerThreadScratchSize(1024u);
@@ -1141,8 +1165,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
11411165
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
11421166

11431167
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
1168+
GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
11441169
// We should have 2 states added
11451170
ASSERT_EQ(2u, mediaVfeStates.size());
1171+
ASSERT_EQ(2u, GSBAStates.size());
11461172

11471173
commandQueue->destroy();
11481174
commandQueue1->destroy();

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueuecommandlist.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenExecutingCommandListsThenEndingPip
198198
using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
199199
HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS, CommandQueueExecuteSupport) {
200200
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
201+
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
201202
using PARSE = typename FamilyType::PARSE;
202203
ze_command_queue_desc_t desc = {};
203204
ze_result_t returnValue;
@@ -228,8 +229,10 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
228229
usedSpaceAfter));
229230

230231
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
232+
auto GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
231233
// We should have only 1 state added
232234
ASSERT_EQ(1u, mediaVfeStates.size());
235+
ASSERT_EQ(1u, GSBAStates.size());
233236

234237
CommandList::fromHandle(commandLists[0])->reset();
235238
CommandList::fromHandle(commandLists[1])->reset();
@@ -252,8 +255,10 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
252255
usedSpaceAfter));
253256

254257
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
258+
GSBAStates = findAll<STATE_BASE_ADDRESS *>(cmdList1.begin(), cmdList1.end());
255259
// We should have 2 states added
256260
ASSERT_EQ(2u, mediaVfeStates.size());
261+
ASSERT_EQ(2u, GSBAStates.size());
257262

258263
commandQueue->destroy();
259264
}

shared/source/command_stream/command_stream_receiver.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ class CommandStreamReceiver {
133133
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
134134
bool getMediaVFEStateDirty() { return mediaVfeStateDirty; }
135135

136+
void setGSBAStateDirty(bool dirty) { GSBAStateDirty = dirty; }
137+
bool getGSBAStateDirty() { return GSBAStateDirty; }
138+
136139
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
137140
GraphicsAllocation *getScratchAllocation();
138141
GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; }
@@ -309,6 +312,7 @@ class CommandStreamReceiver {
309312
bool isEnginePrologueSent = false;
310313
bool isPerDssBackedBufferSent = false;
311314
bool GSBAFor32BitProgrammed = false;
315+
bool GSBAStateDirty = true;
312316
bool bindingTableBaseAddressRequired = false;
313317
bool mediaVfeStateDirty = true;
314318
bool lastVmeSubslicesConfig = false;

0 commit comments

Comments
 (0)