Skip to content

Commit 64d772d

Browse files
Fix for adding MI_SEMAPHORE_WAIT & reset L0 Event
Signed-off-by: Daria Hinz <[email protected]>
1 parent 0b2022c commit 64d772d

File tree

11 files changed

+320
-102
lines changed

11 files changed

+320
-102
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -635,22 +635,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
635635
return ZE_RESULT_ERROR_UNKNOWN;
636636
}
637637

638-
template <GFXCORE_FAMILY gfxCoreFamily>
639-
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
640-
const ze_group_count_t *pThreadGroupDimensions,
641-
ze_event_handle_t hEvent) {
642-
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false);
643-
}
644-
645-
template <GFXCORE_FAMILY gfxCoreFamily>
646-
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
647-
if (beforeWalker) {
648-
appendEventForProfiling(hEvent, true);
649-
} else {
650-
appendSignalEventPostWalker(hEvent);
651-
}
652-
}
653-
654638
template <GFXCORE_FAMILY gfxCoreFamily>
655639
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
656640
NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1464,6 +1448,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
14641448
commandContainer.getDevice()->getHardwareInfo(),
14651449
args);
14661450
}
1451+
14671452
return ZE_RESULT_SUCCESS;
14681453
}
14691454

@@ -1473,25 +1458,30 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
14731458
auto event = Event::fromHandle(hEvent);
14741459

14751460
uint64_t baseAddr = event->getGpuAddress();
1476-
size_t eventOffset = 0;
1461+
uint32_t packetsToReset = 1;
1462+
14771463
if (event->isTimestampEvent) {
1478-
eventOffset = offsetof(TimestampPacketStorage::Packet, contextEnd);
1464+
baseAddr += offsetof(TimestampPacketStorage::Packet, contextEnd);
1465+
packetsToReset = event->getPacketsInUse() ? event->getPacketsInUse() : NEO::TimestampPacketSizeControl::preferredPacketCount;
14791466
}
14801467
commandContainer.addToResidencyContainer(&event->getAllocation());
14811468
if (isCopyOnly()) {
1482-
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_CLEARED, false, true);
1469+
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_CLEARED, false, true);
14831470
} else {
14841471
NEO::PipeControlArgs args;
14851472
args.dcFlushEnable = (!event->signalScope) ? false : true;
1486-
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
1487-
*commandContainer.getCommandStream(),
1488-
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
1489-
ptrOffset(baseAddr, eventOffset),
1490-
Event::STATE_CLEARED,
1491-
commandContainer.getDevice()->getHardwareInfo(),
1492-
args);
1473+
for (uint32_t i = 0u; i < packetsToReset; i++) {
1474+
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
1475+
*commandContainer.getCommandStream(),
1476+
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
1477+
baseAddr,
1478+
Event::STATE_CLEARED,
1479+
commandContainer.getDevice()->getHardwareInfo(),
1480+
args);
1481+
baseAddr += sizeof(struct TimestampPacketStorage::Packet);
1482+
}
14931483
}
1494-
1484+
event->resetPackets();
14951485
return ZE_RESULT_SUCCESS;
14961486
}
14971487

@@ -1524,15 +1514,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
15241514
for (uint32_t i = 0; i < numEvents; i++) {
15251515
auto event = Event::fromHandle(phEvent[i]);
15261516
commandContainer.addToResidencyContainer(&event->getAllocation());
1527-
15281517
gpuAddr = event->getGpuAddress();
1518+
uint32_t packetsToWait = event->getPacketsInUse() ? event->getPacketsInUse() : 1;
1519+
15291520
if (event->isTimestampEvent) {
15301521
gpuAddr += offsetof(TimestampPacketStorage::Packet, contextEnd);
15311522
}
1532-
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
1533-
gpuAddr,
1534-
eventStateClear,
1535-
COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
1523+
for (uint32_t i = 0u; i < packetsToWait; i++) {
1524+
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
1525+
gpuAddr,
1526+
eventStateClear,
1527+
COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
1528+
1529+
gpuAddr += sizeof(struct TimestampPacketStorage::Packet);
1530+
}
15361531
}
15371532

15381533
return ZE_RESULT_SUCCESS;

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
3131
return helper.getRenderSurfaceStateSize();
3232
}
3333

34+
template <GFXCORE_FAMILY gfxCoreFamily>
35+
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
36+
const ze_group_count_t *pThreadGroupDimensions,
37+
ze_event_handle_t hEvent) {
38+
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false);
39+
}
40+
41+
template <GFXCORE_FAMILY gfxCoreFamily>
42+
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
43+
if (beforeWalker) {
44+
appendEventForProfiling(hEvent, true);
45+
} else {
46+
appendSignalEventPostWalker(hEvent);
47+
}
48+
}
49+
3450
template <GFXCORE_FAMILY gfxCoreFamily>
3551
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
3652
const ze_group_count_t *pThreadGroupDimensions,
@@ -86,6 +102,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
86102
isPredicate,
87103
kernel,
88104
0,
105+
false,
89106
device->getNEODevice(),
90107
commandListPreemptionMode,
91108
this->containsStatelessUncachedResource,

level_zero/core/source/event/event.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,17 +241,29 @@ ze_result_t Event::destroy() {
241241
return ZE_RESULT_SUCCESS;
242242
}
243243

244+
ze_result_t EventImp::queryStatusKernelTimestamp() {
245+
assignTimestampData(hostAddress);
246+
uint32_t packetsToCheck = packetsInUse ? packetsInUse : 1;
247+
248+
for (uint32_t i = 0; i < packetsToCheck; i++) {
249+
auto &packet = timestampsData->packets[i];
250+
if (packet.contextEnd == Event::STATE_CLEARED) {
251+
return ZE_RESULT_NOT_READY;
252+
}
253+
}
254+
return ZE_RESULT_SUCCESS;
255+
}
256+
244257
ze_result_t EventImp::queryStatus() {
245258
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
246259
uint32_t queryVal = Event::STATE_CLEARED;
260+
247261
if (metricStreamer != nullptr) {
248262
*hostAddr = metricStreamer->getNotificationState();
249263
}
250264
this->csr->downloadAllocations();
251265
if (isTimestampEvent) {
252-
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
253-
auto timeStampAddress = baseAddr + offsetof(TimestampPacketStorage::Packet, contextEnd);
254-
hostAddr = reinterpret_cast<uint64_t *>(timeStampAddress);
266+
return queryStatusKernelTimestamp();
255267
}
256268
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
257269
return queryVal == Event::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
@@ -343,7 +355,6 @@ ze_result_t EventImp::reset() {
343355
if (allocOnDevice) {
344356
return ZE_RESULT_SUCCESS;
345357
}
346-
347358
resetPackets();
348359
return hostEventSetValue(Event::STATE_INITIAL);
349360
}

level_zero/core/source/event/event.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct Event : _ze_event_handle_t {
5353
virtual uint64_t getGpuAddress();
5454

5555
void *hostAddress = nullptr;
56-
uint32_t packetsInUse;
56+
uint32_t packetsInUse = 0;
5757
uint64_t gpuAddress = 0u;
5858

5959
ze_event_scope_flags_t signalScope = 0u;
@@ -96,6 +96,7 @@ struct EventImp : public Event {
9696

9797
protected:
9898
ze_result_t calculateProfilingData();
99+
ze_result_t queryStatusKernelTimestamp();
99100
ze_result_t hostEventSetValue(uint32_t eventValue);
100101
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
101102
void assignTimestampData(void *address);

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,51 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopy
11221122
EXPECT_EQ(cmdList.end(), itor);
11231123
}
11241124

1125+
HWTEST2_F(CommandListCreate, givenEventWhenInvokingAppendMemoryCopyThenPostSyncIsAdded, Platforms) {
1126+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
1127+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
1128+
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
1129+
1130+
ze_result_t result = ZE_RESULT_SUCCESS;
1131+
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, result));
1132+
1133+
void *srcPtr = reinterpret_cast<void *>(0x1234);
1134+
void *dstPtr = reinterpret_cast<void *>(0x2345);
1135+
1136+
ze_event_pool_desc_t eventPoolDesc = {};
1137+
eventPoolDesc.count = 1;
1138+
1139+
ze_event_desc_t eventDesc = {};
1140+
eventDesc.index = 0;
1141+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
1142+
1143+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
1144+
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
1145+
1146+
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, event->toHandle(), 0, nullptr);
1147+
EXPECT_EQ(0u, event->getPacketsInUse());
1148+
1149+
GenCmdList cmdList;
1150+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
1151+
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed()));
1152+
1153+
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
1154+
ASSERT_NE(0u, itorPC.size());
1155+
uint32_t postSyncFound = 0;
1156+
for (auto it : itorPC) {
1157+
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
1158+
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
1159+
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
1160+
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
1161+
auto gpuAddress = event->getGpuAddress();
1162+
EXPECT_EQ(cmd->getAddressHigh(), gpuAddress >> 32u);
1163+
EXPECT_EQ(cmd->getAddress(), uint32_t(gpuAddress));
1164+
postSyncFound++;
1165+
}
1166+
}
1167+
EXPECT_EQ(1u, postSyncFound);
1168+
}
1169+
11251170
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
11261171
HWTEST2_F(CommandListCreate, givenCommandListThenSshCorrectlyReserved, SupportedPlatforms) {
11271172
MockCommandListHw<gfxCoreFamily> commandList;
@@ -1239,5 +1284,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
12391284
EXPECT_TRUE(cmd->getDcFlushEnable());
12401285
}
12411286
}
1287+
12421288
} // namespace ult
12431289
} // namespace L0

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
147147

148148
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
149149
ASSERT_NE(0u, itorPC.size());
150-
bool postSyncFound = false;
150+
uint32_t postSyncFound = 0u;
151151
for (auto it : itorPC) {
152152
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
153153
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
@@ -156,10 +156,54 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon
156156
EXPECT_EQ(cmd->getAddressHigh(), gpuAddress >> 32u);
157157
EXPECT_EQ(cmd->getAddress(), uint32_t(gpuAddress));
158158
EXPECT_FALSE(cmd->getDcFlushEnable());
159-
postSyncFound = true;
159+
postSyncFound++;
160+
gpuAddress += sizeof(struct TimestampPacketStorage::Packet);
160161
}
161162
}
162-
ASSERT_TRUE(postSyncFound);
163+
ASSERT_EQ(NEO::TimestampPacketSizeControl::preferredPacketCount, postSyncFound);
164+
}
165+
166+
HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetWhenMoreThanOnePacketThenPipeControlAppendedCorrectly, SklPlusMatcher) {
167+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
168+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
169+
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
170+
auto &commandContainer = commandList->commandContainer;
171+
172+
ze_event_pool_desc_t eventPoolDesc = {};
173+
eventPoolDesc.count = 1;
174+
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
175+
176+
ze_event_desc_t eventDesc = {};
177+
eventDesc.index = 0;
178+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
179+
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
180+
181+
event->packetsInUse = 3;
182+
commandList->appendEventReset(event->toHandle());
183+
184+
ASSERT_EQ(0u, event->getPacketsInUse());
185+
auto gpuAddress = event->getGpuAddress() + offsetof(TimestampPacketStorage::Packet, contextEnd);
186+
187+
GenCmdList cmdList;
188+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
189+
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
190+
191+
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
192+
ASSERT_NE(0u, itorPC.size());
193+
uint32_t postSyncFound = 0;
194+
for (auto it : itorPC) {
195+
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
196+
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
197+
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED);
198+
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
199+
EXPECT_EQ(cmd->getAddressHigh(), gpuAddress >> 32u);
200+
EXPECT_EQ(cmd->getAddress(), uint32_t(gpuAddress));
201+
EXPECT_FALSE(cmd->getDcFlushEnable());
202+
postSyncFound++;
203+
gpuAddress += sizeof(struct TimestampPacketStorage::Packet);
204+
}
205+
}
206+
ASSERT_EQ(3u, postSyncFound);
163207
}
164208

165209
HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPipeControlWithDcFlushAppended, SklPlusMatcher) {

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,53 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphic
101101
}
102102
}
103103

104+
HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThreePacketsThenSemaphoreWaitCmdIsGeneratedThreeTimes) {
105+
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
106+
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
107+
108+
ze_event_pool_desc_t eventPoolDesc = {};
109+
eventPoolDesc.count = 1;
110+
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
111+
112+
ze_event_desc_t eventDesc = {};
113+
eventDesc.index = 0;
114+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
115+
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
116+
117+
event->packetsInUse = 3;
118+
ze_event_handle_t hEventHandle = event->toHandle();
119+
auto result = commandList->appendWaitOnEvents(1, &hEventHandle);
120+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
121+
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
122+
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
123+
124+
auto gpuAddress = event->getGpuAddress() + offsetof(TimestampPacketStorage::Packet, contextEnd);
125+
126+
GenCmdList cmdList;
127+
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
128+
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
129+
usedSpaceAfter));
130+
131+
auto itorSW = findAll<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
132+
ASSERT_NE(0u, itorSW.size());
133+
uint32_t semaphoreWaitsFound = 0;
134+
135+
for (auto it : itorSW) {
136+
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
137+
auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace;
138+
139+
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD,
140+
cmd->getCompareOperation());
141+
EXPECT_EQ(cmd->getSemaphoreDataDword(), static_cast<uint32_t>(-1));
142+
EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace);
143+
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode());
144+
145+
semaphoreWaitsFound++;
146+
gpuAddress += sizeof(struct TimestampPacketStorage::Packet);
147+
}
148+
ASSERT_EQ(3u, semaphoreWaitsFound);
149+
}
150+
104151
HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppendingWaitOnEventThenPCWithDcFlushIsGenerated) {
105152
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
106153
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();

0 commit comments

Comments
 (0)