Skip to content

Commit 32ae955

Browse files
Flush small task adjustments
Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent f1b6b73 commit 32ae955

15 files changed

+215
-32
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
176176
NEO::PipeControlArgs args;
177177
this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false);
178178
if (this->isSyncModeQueue) {
179-
this->csr->flushTagUpdate();
180179
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
181180
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
182181
}
@@ -288,7 +287,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
288287
}
289288
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false);
290289
if (this->isSyncModeQueue) {
291-
this->csr->flushTagUpdate();
292290
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
293291
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
294292
}
@@ -322,7 +320,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
322320
}
323321
this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false);
324322
if (this->isSyncModeQueue) {
325-
this->csr->flushTagUpdate();
326323
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
327324
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
328325
}

level_zero/core/source/cmdlist/cmdlist_imp.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
2828

2929
ze_result_t CommandListImp::destroy() {
3030
if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
31-
this->csr->flushTagUpdate();
3231
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
3332
this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount());
3433
}

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,10 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
558558

559559
UNRECOVERABLE_IF(csr == nullptr);
560560

561+
if (csr->isUpdateTagFromWaitEnabled()) {
562+
return;
563+
}
564+
561565
auto taskCountToWrite = csr->peekTaskCount() + 1;
562566
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
563567

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,34 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
243243
commandQueue->destroy();
244244
}
245245

246+
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWriteThenNoPipeControlFlushed) {
247+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
248+
249+
DebugManagerStateRestore restorer;
250+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
251+
252+
const ze_command_queue_desc_t desc = {};
253+
ze_result_t returnValue;
254+
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
255+
device,
256+
neoDevice->getDefaultEngine().commandStreamReceiver,
257+
&desc,
258+
false,
259+
false,
260+
returnValue));
261+
262+
commandQueue->dispatchTaskCountWrite(*commandQueue->commandStream, false);
263+
264+
GenCmdList cmdList;
265+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
266+
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), commandQueue->commandStream->getUsed()));
267+
268+
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
269+
EXPECT_EQ(cmdList.end(), itor);
270+
271+
commandQueue->destroy();
272+
}
273+
246274
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
247275
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
248276
csr->setupContext(*neoDevice->getDefaultEngine().osContext);

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa
125125

126126
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) {
127127
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
128+
DebugManagerStateRestore restorer;
129+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
128130

129131
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
130132

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,38 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
530530
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
531531
}
532532

533+
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) {
534+
DebugManagerStateRestore restorer;
535+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
536+
537+
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
538+
pDevice->resetCommandStreamReceiver(mockCsr);
539+
mockCsr->useNewResourceImplicitFlush = false;
540+
mockCsr->useGpuIdleImplicitFlush = false;
541+
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
542+
543+
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
544+
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
545+
dispatchFlags.guardCommandBufferWithPipeControl = true;
546+
547+
mockCsr->flushTask(commandStream,
548+
0,
549+
dsh,
550+
ioh,
551+
ssh,
552+
taskLevel,
553+
dispatchFlags,
554+
*pDevice);
555+
556+
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
557+
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
558+
559+
mockCsr->flushBatchedSubmissions();
560+
561+
EXPECT_EQ(1u, mockCsr->peekLatestSentTaskCount());
562+
EXPECT_EQ(0u, mockCsr->peekLatestFlushedTaskCount());
563+
}
564+
533565
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTaskIsCalledThenFlushedTaskCountIsModifed) {
534566
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
535567
auto &commandStream = commandQueue.getCS(4096u);
@@ -1024,12 +1056,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
10241056
DebugManager.flags.UpdateTaskCountFromWait.set(1);
10251057

10261058
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
1059+
commandQueue.taskCount = 10;
10271060

10281061
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
10291062
pDevice->resetCommandStreamReceiver(mockCsr);
10301063
mockCsr->useNewResourceImplicitFlush = false;
10311064
mockCsr->useGpuIdleImplicitFlush = false;
10321065
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
1066+
mockCsr->taskCount.store(10);
1067+
mockCsr->latestFlushedTaskCount.store(5);
10331068

10341069
commandQueue.waitForAllEngines(false, nullptr);
10351070

@@ -1052,12 +1087,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
10521087
};
10531088

10541089
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
1090+
commandQueue.taskCount = 10;
10551091

10561092
auto mockCsr = new MockCsrHwDirectSubmission(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
10571093
pDevice->resetCommandStreamReceiver(mockCsr);
10581094
mockCsr->useNewResourceImplicitFlush = false;
10591095
mockCsr->useGpuIdleImplicitFlush = false;
10601096
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
1097+
mockCsr->taskCount.store(10);
1098+
mockCsr->latestFlushedTaskCount.store(5);
10611099

10621100
commandQueue.waitForAllEngines(false, nullptr);
10631101

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,27 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
917917
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, true);
918918
}
919919

920+
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
921+
givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) {
922+
DebugManagerStateRestore restorer;
923+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
924+
925+
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
926+
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
927+
commandStreamReceiver.createPreemptionAllocation();
928+
}
929+
EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig);
930+
commandStreamReceiver.activePartitions = 2;
931+
commandStreamReceiver.taskCount = 3;
932+
EXPECT_TRUE(commandStreamReceiver.staticWorkPartitioningEnabled);
933+
flushTask(commandStreamReceiver, true);
934+
commandStreamReceiver.flushTagUpdate();
935+
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
936+
937+
prepareLinearStream<FamilyType>(commandStream, 0);
938+
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, true);
939+
}
940+
920941
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
921942
givenMultipleDynamicActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
922943
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
@@ -936,6 +957,29 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile
936957
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
937958
}
938959

960+
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
961+
givenMultipleDynamicActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
962+
DebugManagerStateRestore restorer;
963+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
964+
965+
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
966+
if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) {
967+
commandStreamReceiver.createPreemptionAllocation();
968+
}
969+
commandStreamReceiver.activePartitions = 2;
970+
commandStreamReceiver.taskCount = 3;
971+
commandStreamReceiver.staticWorkPartitioningEnabled = false;
972+
flushTask(commandStreamReceiver, true);
973+
commandStreamReceiver.flushTagUpdate();
974+
EXPECT_EQ(2u, commandStreamReceiver.activePartitionsConfig);
975+
976+
prepareLinearStream<FamilyType>(commandStream, 0);
977+
verifyPipeControl<FamilyType>(commandStreamReceiver, 4, false);
978+
979+
prepareLinearStream<FamilyType>(commandStreamReceiver.commandStream, 0);
980+
verifyActivePartitionConfig<FamilyType>(commandStreamReceiver, false);
981+
}
982+
939983
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests,
940984
givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) {
941985
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "shared/source/command_stream/scratch_space_controller_base.h"
99
#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h"
10+
#include "shared/test/common/helpers/debug_manager_state_restore.h"
1011
#include "shared/test/common/helpers/engine_descriptor_helper.h"
1112
#include "shared/test/common/helpers/ult_hw_config.h"
1213
#include "shared/test/common/mocks/mock_allocation_properties.h"
@@ -348,6 +349,37 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) {
348349
EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount());
349350
}
350351

352+
HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) {
353+
DebugManagerStateRestore restorer;
354+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
355+
356+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
357+
358+
cl_int retVal = CL_SUCCESS;
359+
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
360+
361+
constexpr size_t hostAllocationSize = MemoryConstants::pageSize;
362+
auto hostAllocationPtr = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize);
363+
void *hostPtr = reinterpret_cast<void *>(hostAllocationPtr.get());
364+
365+
auto graphicsAllocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex());
366+
367+
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::HostPtrToBuffer,
368+
csr, graphicsAllocation, nullptr, hostPtr,
369+
graphicsAllocation->getGpuAddress(), 0,
370+
0, 0, {1, 1, 1}, 0, 0, 0, 0);
371+
372+
BlitPropertiesContainer blitPropertiesContainer;
373+
blitPropertiesContainer.push_back(blitProperties);
374+
375+
auto taskCount = csr.peekTaskCount();
376+
377+
csr.blitBuffer(blitPropertiesContainer, false, false, *pDevice);
378+
379+
EXPECT_EQ(csr.peekTaskCount(), taskCount + 1);
380+
EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount);
381+
}
382+
351383
HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstructedProperly) {
352384
auto bcsOsContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0,
353385
EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, pDevice->getDeviceBitfield())));

opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,30 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC
318318
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
319319
}
320320

321+
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
322+
DebugManagerStateRestore restorer;
323+
DebugManager.flags.UpdateTaskCountFromWait.set(1);
324+
325+
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
326+
327+
GraphicsAllocation *commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
328+
ASSERT_NE(nullptr, commandBuffer);
329+
LinearStream cs(commandBuffer);
330+
331+
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
332+
ResidencyContainer residencyList;
333+
334+
auto previousTaskCount = csr.peekTaskCount();
335+
auto currentTaskCount = previousTaskCount + 1;
336+
csr.submitBatchBuffer(batchBuffer, residencyList);
337+
338+
EXPECT_EQ(currentTaskCount, csr.peekTaskCount());
339+
EXPECT_EQ(previousTaskCount, csr.peekLatestFlushedTaskCount());
340+
EXPECT_EQ(currentTaskCount, csr.peekLatestSentTaskCount());
341+
342+
memoryManager->freeGraphicsMemoryImpl(commandBuffer);
343+
}
344+
321345
HWTEST_F(CommandStreamReceiverTest, givenOverrideCsrAllocationSizeWhenCreatingCommandStreamCsrGraphicsAllocationThenAllocationHasCorrectSize) {
322346
DebugManagerStateRestore restore;
323347

opencl/test/unit_test/kernel/kernel_tests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
472472
void flushTagUpdate() override{};
473473
void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{};
474474
void updateTagFromWait() override{};
475+
bool isUpdateTagFromWaitEnabled() override { return false; };
475476

476477
bool isMultiOsContextCapable() const override { return false; }
477478

0 commit comments

Comments
 (0)