Skip to content

Commit 3ed8b43

Browse files
Use primary buffer start when immediate command list using flush task
Related-To: NEO-7091 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent fffd56d commit 3ed8b43

File tree

8 files changed

+244
-82
lines changed

8 files changed

+244
-82
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &n
350350
0,
351351
0,
352352
!(cmdListType == CommandListType::TYPE_IMMEDIATE),
353-
true);
353+
!this->isFlushTaskSubmissionEnabled);
354354
}
355355

356356
template <GFXCORE_FAMILY gfxCoreFamily>

level_zero/core/test/unit_tests/fixtures/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ set(L0_FIXTURES_SOURCES
1515
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h
1616
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.h
1717
${CMAKE_CURRENT_SOURCE_DIR}/memory_ipc_fixture.h
18+
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.cpp
19+
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.h
1820
)
1921

2022
add_library(${TARGET_NAME} OBJECT ${L0_FIXTURES_SOURCES} ${NEO_CORE_tests_compiler_mocks})
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright (C) 2022 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
9+
10+
#include "level_zero/core/source/context/context_imp.h"
11+
12+
namespace L0 {
13+
namespace ult {
14+
15+
void MultiTileCommandListAppendLaunchFunctionFixture::SetUp() {
16+
DebugManager.flags.EnableImplicitScaling.set(1);
17+
18+
MultiDeviceFixture::numRootDevices = 1u;
19+
MultiDeviceFixture::numSubDevices = 4u;
20+
21+
MultiDeviceModuleFixture::SetUp();
22+
createModuleFromBinary(0u);
23+
createKernel(0u);
24+
25+
device = driverHandle->devices[0];
26+
27+
ze_context_handle_t hContext;
28+
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
29+
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
30+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
31+
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
32+
33+
ze_result_t returnValue;
34+
commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
35+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
36+
}
37+
38+
void MultiTileCommandListAppendLaunchFunctionFixture::TearDown() {
39+
commandList->destroy();
40+
contextImp->destroy();
41+
42+
MultiDeviceModuleFixture::TearDown();
43+
}
44+
45+
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::SetUp() {
46+
DebugManager.flags.EnableImplicitScaling.set(1);
47+
48+
MultiDeviceFixture::numRootDevices = 1u;
49+
MultiDeviceFixture::numSubDevices = 2u;
50+
51+
MultiDeviceModuleFixture::SetUp();
52+
createModuleFromBinary(0u);
53+
createKernel(0u);
54+
55+
device = driverHandle->devices[0];
56+
57+
ze_context_handle_t hContext;
58+
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
59+
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
60+
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
61+
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
62+
}
63+
64+
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::TearDown() {
65+
contextImp->destroy();
66+
67+
MultiDeviceModuleFixture::TearDown();
68+
}
69+
70+
} // namespace ult
71+
} // namespace L0
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright (C) 2022 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#pragma once
9+
#include "shared/test/common/helpers/variable_backup.h"
10+
11+
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
12+
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
13+
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
14+
15+
namespace L0 {
16+
struct ContextImp;
17+
struct Device;
18+
19+
namespace ult {
20+
21+
struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
22+
void SetUp();
23+
void TearDown();
24+
25+
ContextImp *contextImp = nullptr;
26+
WhiteBox<::L0::CommandList> *commandList = nullptr;
27+
L0::Device *device = nullptr;
28+
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
29+
};
30+
31+
struct MultiTileImmediateCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
32+
void SetUp();
33+
void TearDown();
34+
35+
ContextImp *contextImp = nullptr;
36+
L0::Device *device = nullptr;
37+
VariableBackup<bool> backupApiSupport{&NEO::ImplicitScaling::apiSupport, true};
38+
VariableBackup<bool> backupLocalMemory{&NEO::OSInterface::osEnableLocalMemory, true};
39+
};
40+
41+
} // namespace ult
42+
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo
8484
template <typename FamilyType>
8585
void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset,
8686
uint64_t gpuFinalSyncAddress, uint64_t gpuCrossTileSyncAddress, uint64_t gpuStartAddress,
87-
bool validateCleanupSection) {
87+
bool validateCleanupSection, bool secondaryBatchBuffer) {
8888
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
8989
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
9090
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
@@ -130,7 +130,11 @@ void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset,
130130
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(cmdBuffer, parsedOffset));
131131
ASSERT_NE(nullptr, bbStart);
132132
EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress());
133-
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
133+
if (secondaryBatchBuffer) {
134+
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
135+
} else {
136+
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
137+
}
134138
parsedOffset += sizeof(MI_BATCH_BUFFER_START);
135139
}
136140
{
@@ -236,7 +240,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControl
236240
void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore);
237241
size_t parsedOffset = 0;
238242

239-
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
243+
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
240244

241245
EXPECT_EQ(expectedUseBuffer, parsedOffset);
242246
}
@@ -298,7 +302,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
298302
void *cmdBuffer = cmdListStream->getCpuBase();
299303
size_t parsedOffset = 0;
300304

301-
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
305+
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
302306

303307
EXPECT_EQ(expectedUseBuffer, parsedOffset);
304308
}
@@ -363,7 +367,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
363367
void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore);
364368
size_t parsedOffset = 0;
365369

366-
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
370+
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
367371
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
368372

369373
cmdBuffer = ptrOffset(cmdBuffer, parsedOffset);
@@ -485,7 +489,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
485489
cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters);
486490
size_t parsedOffset = 0;
487491

488-
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
492+
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
489493
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
490494

491495
cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization));
@@ -517,6 +521,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
517521
auto immediateCommandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
518522
ASSERT_NE(nullptr, immediateCommandList);
519523
immediateCommandList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
524+
immediateCommandList->isFlushTaskSubmissionEnabled = true;
520525
ze_result_t returnValue = immediateCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
521526
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
522527
EXPECT_EQ(2u, immediateCommandList->partitionCount);
@@ -586,7 +591,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
586591
ASSERT_NE(cmdList.end(), itorBbStart);
587592
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
588593
EXPECT_EQ(bbStartGpuAddress, cmdBbStart->getBatchBufferStartAddress());
589-
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
594+
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
590595

591596
auto atomicCounter = reinterpret_cast<uint32_t *>(ptrOffset(cmdBbStart, sizeof(MI_BATCH_BUFFER_START)));
592597
EXPECT_EQ(0u, *atomicCounter);
@@ -599,9 +604,42 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
599604
void *cmdBuffer = ptrOffset(cmdStream->getCpuBase(), usedBeforeSize);
600605
size_t parsedOffset = 0;
601606

602-
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, 0, crossTileSyncGpuAddress, bbStartGpuAddress, false);
607+
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, 0, crossTileSyncGpuAddress, bbStartGpuAddress, false, false);
603608
EXPECT_EQ(expectedSize, parsedOffset);
604609
}
605610

611+
HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
612+
givenMultiTileImmediateCommandListNotUsingFlushTaskWhenAppendingBarrierThenExpectSecondaryBufferStart, IsWithinXeGfxFamily) {
613+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
614+
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
615+
616+
auto immediateCommandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
617+
ASSERT_NE(nullptr, immediateCommandList);
618+
immediateCommandList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
619+
immediateCommandList->isFlushTaskSubmissionEnabled = false;
620+
ze_result_t returnValue = immediateCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
621+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
622+
EXPECT_EQ(2u, immediateCommandList->partitionCount);
623+
624+
auto cmdStream = immediateCommandList->commandContainer.getCommandStream();
625+
626+
size_t usedBeforeSize = cmdStream->getUsed();
627+
628+
returnValue = immediateCommandList->appendBarrier(nullptr, 0, nullptr);
629+
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
630+
size_t usedAfterSize = cmdStream->getUsed();
631+
632+
GenCmdList cmdList;
633+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
634+
cmdList,
635+
ptrOffset(cmdStream->getCpuBase(), usedBeforeSize),
636+
(usedAfterSize - usedBeforeSize)));
637+
638+
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
639+
ASSERT_NE(cmdList.end(), itorBbStart);
640+
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
641+
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
642+
}
643+
606644
} // namespace ult
607645
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "level_zero/core/source/event/event.h"
1919
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
20+
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
2021
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
2122
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
2223

@@ -1275,44 +1276,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
12751276
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
12761277
}
12771278

1278-
struct MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture : public MultiDeviceModuleFixture {
1279-
void SetUp() {
1280-
DebugManager.flags.EnableImplicitScaling.set(1);
1281-
1282-
MultiDeviceFixture::numRootDevices = 1u;
1283-
MultiDeviceFixture::numSubDevices = 4u;
1284-
1285-
MultiDeviceModuleFixture::SetUp();
1286-
createModuleFromBinary(0u);
1287-
createKernel(0u);
1288-
1289-
device = driverHandle->devices[0];
1290-
1291-
ze_context_handle_t hContext;
1292-
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
1293-
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
1294-
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
1295-
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
1296-
1297-
ze_result_t returnValue;
1298-
commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
1299-
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
1300-
}
1301-
1302-
void TearDown() {
1303-
commandList->destroy();
1304-
contextImp->destroy();
1305-
1306-
MultiDeviceModuleFixture::TearDown();
1307-
}
1308-
1309-
ContextImp *contextImp = nullptr;
1310-
WhiteBox<::L0::CommandList> *commandList = nullptr;
1311-
L0::Device *device = nullptr;
1312-
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
1313-
};
1314-
1315-
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture>;
1279+
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionFixture>;
13161280

13171281
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
13181282
ze_event_pool_desc_t eventPoolDesc = {};
@@ -1378,5 +1342,37 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
13781342
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
13791343
}
13801344

1345+
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest,
1346+
givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) {
1347+
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
1348+
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
1349+
DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1);
1350+
1351+
ze_group_count_t groupCount{128, 1, 1};
1352+
1353+
auto cmdStream = commandList->commandContainer.getCommandStream();
1354+
1355+
auto sizeBefore = cmdStream->getUsed();
1356+
CmdListKernelLaunchParams launchParams = {};
1357+
auto result = commandList->appendLaunchKernel(kernel.get(), &groupCount, nullptr, 0, nullptr, launchParams);
1358+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
1359+
auto sizeAfter = cmdStream->getUsed();
1360+
1361+
GenCmdList cmdList;
1362+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
1363+
cmdList,
1364+
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
1365+
sizeAfter - sizeBefore));
1366+
1367+
auto itorWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
1368+
auto cmd = genCmdCast<WALKER_TYPE *>(*itorWalker);
1369+
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
1370+
1371+
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
1372+
ASSERT_NE(cmdList.end(), itorBbStart);
1373+
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
1374+
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
1375+
}
1376+
13811377
} // namespace ult
13821378
} // namespace L0

0 commit comments

Comments
 (0)