Skip to content

Commit d2eb296

Browse files
performance: Refactor cmd buffer reuse for cmd lists
Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent ec40fdc commit d2eb296

File tree

3 files changed

+70
-26
lines changed

3 files changed

+70
-26
lines changed

shared/source/command_container/cmdcontainer.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -296,24 +296,29 @@ void CommandContainer::handleCmdBufferAllocations(size_t startIndex) {
296296
}
297297
for (size_t i = startIndex; i < cmdBufferAllocations.size(); i++) {
298298
if (this->reusableAllocationList) {
299-
300-
if (isHandleFenceCompletionRequired) {
301-
std::vector<std::unique_lock<CommandStreamReceiver::MutexType>> locks;
302-
for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) {
303-
if (cmdBufferAllocations[i]->isUsedByOsContext(engine.osContext->getContextId())) {
304-
locks.push_back(engine.commandStreamReceiver->obtainUniqueOwnership());
305-
engine.commandStreamReceiver->stopDirectSubmission(false);
306-
}
307-
}
308-
if (!locks.empty()) {
309-
this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]);
299+
bool allocationHandled = false;
300+
for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) {
301+
auto osContextId = engine.osContext->getContextId();
302+
if (cmdBufferAllocations[i]->isUsedByOsContext(osContextId) && engine.commandStreamReceiver->isAnyDirectSubmissionEnabled()) {
303+
auto lock = engine.commandStreamReceiver->obtainUniqueOwnership();
304+
auto taskCount = engine.commandStreamReceiver->peekTaskCount() + 1;
305+
cmdBufferAllocations[i]->updateTaskCount(taskCount, osContextId);
306+
cmdBufferAllocations[i]->updateResidencyTaskCount(taskCount, osContextId);
307+
engine.commandStreamReceiver->flushTagUpdate();
308+
engine.commandStreamReceiver->waitForTaskCount(taskCount);
309+
allocationHandled = true;
310310
}
311311
}
312+
if (!allocationHandled && isHandleFenceCompletionRequired) {
313+
this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]);
314+
}
312315

313316
for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) {
314-
cmdBufferAllocations[i]->releaseUsageInOsContext(engine.osContext->getContextId());
317+
auto osContextId = engine.osContext->getContextId();
318+
cmdBufferAllocations[i]->releaseUsageInOsContext(osContextId);
315319
}
316-
reusableAllocationList->pushFrontOne(*cmdBufferAllocations[i]);
320+
321+
reusableAllocationList->pushTailOne(*cmdBufferAllocations[i]);
317322
} else {
318323
this->device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]);
319324
}
@@ -328,7 +333,7 @@ GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation(bool for
328333
forceHostMemory &= this->useSecondaryCommandStream;
329334
GraphicsAllocation *cmdBufferAllocation = nullptr;
330335
if (this->reusableAllocationList) {
331-
size_t alignedSize = getAlignedCmdBufferSize();
336+
const size_t alignedSize = getAlignedCmdBufferSize();
332337
cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, nullptr, AllocationType::commandBuffer).release();
333338
}
334339
if (!cmdBufferAllocation) {

shared/test/common/libult/ult_command_stream_receiver.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018-2023 Intel Corporation
2+
* Copyright (C) 2018-2024 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -336,7 +336,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
336336
}
337337
SubmissionStatus flushTagUpdate() override {
338338
flushTagUpdateCalled = true;
339-
return CommandStreamReceiverHw<GfxFamily>::flushTagUpdate();
339+
auto ret = SubmissionStatus::success;
340+
if (this->callFlushTagUpdate) {
341+
ret = CommandStreamReceiverHw<GfxFamily>::flushTagUpdate();
342+
}
343+
return ret;
340344
}
341345

342346
void initProgrammingFlags() override {
@@ -505,6 +509,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
505509
std::atomic_bool downloadAllocationsCalled = false;
506510
bool flushBatchedSubmissionsCalled = false;
507511
bool flushTagUpdateCalled = false;
512+
bool callFlushTagUpdate = true;
508513
bool initProgrammingFlagsCalled = false;
509514
bool multiOsContextCapable = false;
510515
bool memoryCompressionEnabled = false;

shared/test/unit_test/command_container/command_container_tests.cpp

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen
309309
cmdContainer->initialize(pDevice, &allocList, true, HeapSize::defaultHeapSize, false);
310310
auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations();
311311
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
312+
auto csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(memoryManager->getRegisteredEngines(0u)[0].commandStreamReceiver);
313+
csr->directSubmissionAvailable = true;
314+
csr->callFlushTagUpdate = false;
312315
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 0u);
313316
EXPECT_EQ(cmdBufferAllocs.size(), 1u);
314317
EXPECT_TRUE(allocList.peekIsEmpty());
@@ -318,10 +321,9 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen
318321

319322
auto cmdBuffer0 = cmdBufferAllocs[0];
320323
auto cmdBuffer1 = cmdBufferAllocs[1];
321-
auto csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(memoryManager->getRegisteredEngines(0u)[0].commandStreamReceiver);
322324

323325
cmdContainer->reset();
324-
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 0u);
326+
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 1u);
325327
EXPECT_EQ(cmdBufferAllocs.size(), 1u);
326328
EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0);
327329
EXPECT_FALSE(allocList.peekIsEmpty());
@@ -330,32 +332,31 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen
330332

331333
cmdContainer->allocateNextCommandBuffer();
332334
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
333-
cmdBuffer1->updateTaskCount(1u, 0u);
334335
cmdContainer->reset();
335-
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 1u);
336+
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 2u);
336337
EXPECT_EQ(cmdBufferAllocs.size(), 1u);
337338
EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0);
338339
EXPECT_FALSE(allocList.peekIsEmpty());
339-
EXPECT_TRUE(csr->stopDirectSubmissionCalled);
340+
EXPECT_FALSE(csr->stopDirectSubmissionCalled);
340341
EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking);
341342

342343
cmdContainer->allocateNextCommandBuffer();
343344
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
344345
EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0);
345346
EXPECT_EQ(cmdBufferAllocs[1], cmdBuffer1);
346347
EXPECT_TRUE(allocList.peekIsEmpty());
347-
EXPECT_TRUE(csr->stopDirectSubmissionCalled);
348+
EXPECT_FALSE(csr->stopDirectSubmissionCalled);
348349
EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking);
349350
cmdBuffer1->updateTaskCount(1u, 0u);
350351

351352
cmdContainer.reset();
352353

353-
EXPECT_TRUE(csr->stopDirectSubmissionCalled);
354+
EXPECT_FALSE(csr->stopDirectSubmissionCalled);
354355
EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking);
355356
csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(memoryManager->getRegisteredEngines(0u)[1].commandStreamReceiver);
356357
EXPECT_FALSE(csr->stopDirectSubmissionCalled);
357358
EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking);
358-
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 2u);
359+
EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 3u);
359360
EXPECT_FALSE(allocList.peekIsEmpty());
360361
cmdBuffer1->releaseUsageInOsContext(0u);
361362
allocList.freeAllGraphicsAllocations(pDevice);
@@ -375,22 +376,55 @@ TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlist
375376
cmdContainer->allocateNextCommandBuffer();
376377
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
377378

379+
cmdContainer->reset();
380+
EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled);
381+
cmdContainer->allocateNextCommandBuffer();
382+
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
383+
384+
cmdBufferAllocs[1]->updateTaskCount(2u, 0u);
385+
cmdContainer->reset();
386+
EXPECT_EQ(2u, memoryManager->handleFenceCompletionCalled);
387+
cmdContainer->allocateNextCommandBuffer();
388+
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
389+
EXPECT_FALSE(cmdBufferAllocs[1]->isUsedByOsContext(0u));
390+
391+
cmdBufferAllocs[0]->updateTaskCount(5u, 0u);
392+
cmdBufferAllocs[1]->updateTaskCount(5u, 0u);
393+
cmdContainer.reset();
394+
EXPECT_EQ(4u, memoryManager->handleFenceCompletionCalled);
395+
allocList.freeAllGraphicsAllocations(pDevice);
396+
}
397+
398+
TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlistResetAndDestroyFlagSetWhenAllocateAndResetThenHandleFenceCompletionIsNotCalled) {
399+
DebugManagerStateRestore restore;
400+
debugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.set(1);
401+
402+
AllocationsList allocList;
403+
auto cmdContainer = std::make_unique<CommandContainer>();
404+
cmdContainer->initialize(pDevice, &allocList, HeapSize::defaultHeapSize, true, false);
405+
auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations();
406+
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
407+
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
408+
EXPECT_EQ(cmdBufferAllocs.size(), 1u);
409+
cmdContainer->allocateNextCommandBuffer();
410+
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
411+
378412
cmdContainer->reset();
379413
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
380414
cmdContainer->allocateNextCommandBuffer();
381415
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
382416

383417
cmdBufferAllocs[1]->updateTaskCount(2u, 0u);
384418
cmdContainer->reset();
385-
EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled);
419+
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
386420
cmdContainer->allocateNextCommandBuffer();
387421
EXPECT_EQ(cmdBufferAllocs.size(), 2u);
388422
EXPECT_FALSE(cmdBufferAllocs[1]->isUsedByOsContext(0u));
389423

390424
cmdBufferAllocs[0]->updateTaskCount(5u, 0u);
391425
cmdBufferAllocs[1]->updateTaskCount(5u, 0u);
392426
cmdContainer.reset();
393-
EXPECT_EQ(3u, memoryManager->handleFenceCompletionCalled);
427+
EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled);
394428
allocList.freeAllGraphicsAllocations(pDevice);
395429
}
396430

0 commit comments

Comments
 (0)