Skip to content

Commit 98d7768

Browse files
Add initial support for KernelArgsBuffer allocation
Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent d3796b2 commit 98d7768

File tree

42 files changed

+147
-40
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+147
-40
lines changed

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
282282
: NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(hwInfo);
283283
}
284284

285+
linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(csr->getKernelArgsBufferAllocation(), csr->getLogicalStateHelper());
286+
285287
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
286288
size_t padding = alignedSize - linearStreamSizeEstimate;
287289

@@ -370,6 +372,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
370372
csrHw->programActivePartitionConfig(child);
371373
}
372374

375+
NEO::EncodeKernelArgsBuffer<GfxFamily>::encodeKernelArgsBufferCmds(csr->getKernelArgsBufferAllocation(), csr->getLogicalStateHelper());
376+
377+
if (csr->getKernelArgsBufferAllocation()) {
378+
csr->makeResident(*csr->getKernelArgsBufferAllocation());
379+
}
380+
373381
if (csr->getLogicalStateHelper()) {
374382
if (frontEndStateDirty && !isCopyOnlyCommandQueue) {
375383
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child);

level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
6868
false,
6969
NEO::MemoryCompressionState::NotApplicable,
7070
false,
71-
1u,
72-
nullptr);
71+
1u);
7372
*sbaCmdBuf = sbaCmd;
7473
csr->setGSBAStateDirty(false);
7574

level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateBaseAddress(uint64_t gsba, bool
5151
multiOsContextCapable,
5252
NEO::MemoryCompressionState::NotApplicable,
5353
false,
54-
1u,
55-
nullptr);
54+
1u);
5655
*sbaCmdBuf = sbaCmd;
5756

5857
auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ using CommandQueueCommandsMultiTile = CommandQueueCommands<true>;
251251

252252
HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandListsThenHardwareContextIsProgrammedAndGlobalAllocationResident) {
253253
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
254+
csr.createKernelArgsBufferAllocation();
254255
csr.initializeTagAllocation();
255256
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
256257

@@ -277,6 +278,34 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
277278
commandQueue->destroy();
278279
}
279280

281+
HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandListsThenKernelArgBufferAllocationIsResident) {
282+
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
283+
csr.createKernelArgsBufferAllocation();
284+
csr.initializeTagAllocation();
285+
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
286+
287+
ze_result_t returnValue;
288+
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
289+
device,
290+
&csr,
291+
&desc,
292+
true,
293+
false,
294+
returnValue);
295+
ASSERT_NE(nullptr, commandQueue);
296+
297+
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue));
298+
auto commandListHandle = commandList->toHandle();
299+
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
300+
301+
auto kernelArgsBufferAllocation = csr.getKernelArgsBufferAllocation();
302+
if (kernelArgsBufferAllocation) {
303+
EXPECT_TRUE(isAllocationInResidencyContainer(csr, kernelArgsBufferAllocation));
304+
}
305+
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
306+
commandQueue->destroy();
307+
}
308+
280309
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident, IsAtLeastXeHpCore) {
281310
DebugManagerStateRestore restorer;
282311
DebugManager.flags.EnableWalkerPartition.set(1);
@@ -296,6 +325,7 @@ HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecuti
296325
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
297326
EXPECT_EQ(2u, csr.activePartitions);
298327
csr.initializeTagAllocation();
328+
csr.createKernelArgsBufferAllocation();
299329
csr.createWorkPartitionAllocation(*neoDevice);
300330
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
301331

@@ -352,6 +382,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenCommandQueueWhenExecutingCommandL
352382

353383
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
354384
csr.initializeTagAllocation();
385+
csr.createKernelArgsBufferAllocation();
355386
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
356387
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
357388
csr.createPreemptionAllocation();
@@ -416,6 +447,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
416447

417448
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
418449
csr.initializeTagAllocation();
450+
csr.createKernelArgsBufferAllocation();
419451
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
420452
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
421453
csr.createPreemptionAllocation();
@@ -479,6 +511,7 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndir
479511

480512
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
481513
csr.initializeTagAllocation();
514+
csr.createKernelArgsBufferAllocation();
482515
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
483516
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
484517
csr.createPreemptionAllocation();

level_zero/core/test/unit_tests/sources/context/test_context.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
520520
const ze_command_queue_desc_t desc = {};
521521
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
522522
csr.initializeTagAllocation();
523+
csr.createKernelArgsBufferAllocation();
523524
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
524525

525526
ze_result_t returnValue;
@@ -570,6 +571,7 @@ HWTEST2_F(ContextMakeMemoryResidentAndMigrationTests,
570571
const ze_command_queue_desc_t desc = {};
571572
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
572573
csr.initializeTagAllocation();
574+
csr.createKernelArgsBufferAllocation();
573575
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
574576

575577
ze_result_t returnValue;
@@ -618,6 +620,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
618620
const ze_command_queue_desc_t desc = {};
619621
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
620622
csr.initializeTagAllocation();
623+
csr.createKernelArgsBufferAllocation();
621624
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
622625

623626
ze_result_t returnValue;

opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,7 +504,7 @@ HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenTaskLevelIsIncremented) {
504504
}
505505

506506
HWTEST_F(EnqueueKernelTest, WhenEnqueingKernelThenCsrTaskLevelIsIncremented) {
507-
//this test case assumes IOQ
507+
// this test case assumes IOQ
508508
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
509509
csr.taskCount = pCmdQ->taskCount + 100;
510510
csr.taskLevel = pCmdQ->taskLevel + 50;
@@ -771,9 +771,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
771771

772772
auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead();
773773

774-
//Three more surfaces from preemptionAllocation, SipKernel and clearColorAllocation
774+
// Three more surfaces from preemptionAllocation, SipKernel and clearColorAllocation
775775
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
776776
csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1;
777+
csrSurfaceCount += mockCsr->getKernelArgsBufferAllocation() ? 1 : 0;
777778
size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0;
778779
size_t fenceSurfaceCount = mockCsr->globalFenceAllocation ? 1 : 0;
779780
size_t clearColorSize = mockCsr->clearColorAllocation ? 1 : 0;
@@ -926,7 +927,7 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenKernelIs
926927

927928
MockKernelWithInternals mockKernel(*pClDevice, context);
928929
size_t gws[3] = {1, 0, 0};
929-
//make sure csr emits something
930+
// make sure csr emits something
930931
mockCsrmockCsr.mediaVfeStateDirty = true;
931932
pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
932933
mockCsrmockCsr.mediaVfeStateDirty = true;

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenFlu
984984
CommandStreamReceiverHwLog<FamilyType> commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
985985
commandStreamReceiver.setupContext(*pDevice->getDefaultEngine().osContext);
986986
commandStreamReceiver.initializeTagAllocation();
987+
commandStreamReceiver.createKernelArgsBufferAllocation();
987988
commandStreamReceiver.createPreemptionAllocation();
988989
commandStream.getSpace(sizeof(typename FamilyType::MI_NOOP));
989990

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,8 +1229,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrWhenGen
12291229
false,
12301230
MemoryCompressionState::NotApplicable,
12311231
false,
1232-
1u,
1233-
nullptr);
1232+
1u);
12341233

12351234
EXPECT_NE(generalStateBaseAddress, sbaCmd.getGeneralStateBaseAddress());
12361235
EXPECT_EQ(gmmHelper->decanonize(generalStateBaseAddress), sbaCmd.getGeneralStateBaseAddress());
@@ -1256,8 +1255,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroGeneralStateBaseAddres
12561255
false,
12571256
MemoryCompressionState::NotApplicable,
12581257
false,
1259-
1u,
1260-
nullptr);
1258+
1u);
12611259

12621260
EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress());
12631261
EXPECT_EQ(0u, sbaCmd.getGeneralStateBufferSize());
@@ -1285,8 +1283,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroInternalHeapBaseAddres
12851283
false,
12861284
MemoryCompressionState::NotApplicable,
12871285
false,
1288-
1u,
1289-
nullptr);
1286+
1u);
12901287

12911288
EXPECT_FALSE(sbaCmd.getInstructionBaseAddressModifyEnable());
12921289
EXPECT_EQ(0ull, sbaCmd.getInstructionBaseAddress());
@@ -1319,8 +1316,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgram
13191316
false,
13201317
MemoryCompressionState::NotApplicable,
13211318
false,
1322-
1u,
1323-
nullptr);
1319+
1u);
13241320

13251321
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
13261322
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas
7474
csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1;
7575
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
7676
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
77+
csrSurfaceCount += mockCsr->getKernelArgsBufferAllocation() ? 1 : 0;
7778

7879
//we should have 3 heaps, tag allocation and csr command stream + cq
7980
EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size());
@@ -809,6 +810,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenTotalRes
809810
}
810811

811812
mockCsr->initializeTagAllocation();
813+
mockCsr->createKernelArgsBufferAllocation();
812814
mockCsr->useNewResourceImplicitFlush = false;
813815
mockCsr->useGpuIdleImplicitFlush = false;
814816
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
341341
false,
342342
MemoryCompressionState::NotApplicable,
343343
false,
344-
1u,
345-
nullptr);
344+
1u);
346345

347346
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
348347
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());
@@ -810,6 +809,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi
810809
csrSurfaceCount -= pDevice->getHardwareInfo().capabilityTable.supportsImages ? 0 : 1;
811810
csrSurfaceCount += mockCsr->globalFenceAllocation ? 1 : 0;
812811
csrSurfaceCount += mockCsr->clearColorAllocation ? 1 : 0;
812+
csrSurfaceCount += mockCsr->getKernelArgsBufferAllocation() ? 1 : 0;
813813

814814
EXPECT_EQ(4u + csrSurfaceCount, cmdBuffer->surfaces.size());
815815

0 commit comments

Comments
 (0)