Skip to content

Commit 71ff85c

Browse files
Complete kernel prior to timestamp capture
While at it, fix event flag check upon creation Change-Id: I2c57b0e20141fc38a4de695cae79bf2f14dd6cdd Signed-off-by: Aravind Gopalakrishnan <[email protected]>
1 parent bd24350 commit 71ff85c

File tree

6 files changed

+224
-26
lines changed

6 files changed

+224
-26
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,12 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
9292

9393
commandContainer.addToResidencyContainer(&event->getAllocation());
9494
if (beforeWalker) {
95-
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW);
95+
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START);
9696
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress);
9797

98-
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH);
99-
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress);
100-
10198
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
10299
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
103100
} else {
104-
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
105-
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
106101

107102
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
108103
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
@@ -116,6 +111,13 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
116111
0llu,
117112
dcFlushEnable,
118113
device->getHwInfo());
114+
115+
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
116+
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
117+
118+
if (dcFlushEnable) {
119+
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
120+
}
119121
}
120122
}
121123
}

level_zero/core/source/event/event.cpp

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ struct EventImp : public Event {
6767
EventPool *eventPool;
6868

6969
protected:
70-
ze_result_t hostEventSetValue(uint64_t eventValue);
71-
ze_result_t hostEventSetValueTimestamps(uint64_t eventVal);
70+
ze_result_t hostEventSetValue(uint32_t eventValue);
71+
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
7272
void makeAllocationResident();
7373
};
7474

@@ -81,7 +81,7 @@ struct EventPoolImp : public EventPool {
8181
}
8282

8383
auto timestampMultiplier = 1;
84-
if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) {
84+
if (flags & ZE_EVENT_POOL_FLAG_TIMESTAMP) {
8585
isEventPoolUsedForTimestamp = true;
8686
timestampMultiplier = numEventTimestampsToRead;
8787
}
@@ -154,7 +154,7 @@ struct EventPoolImp : public EventPool {
154154
protected:
155155
const uint32_t eventSize = 16u;
156156
const uint32_t eventAlignment = MemoryConstants::cacheLineSize;
157-
const int32_t numEventTimestampsToRead = 5u;
157+
const int32_t numEventTimestampsToRead = 4u;
158158
};
159159

160160
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
@@ -206,7 +206,7 @@ void EventImp::makeAllocationResident() {
206206
}
207207
}
208208

209-
ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) {
209+
ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
210210
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) {
211211
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
212212
auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i);
@@ -224,7 +224,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) {
224224
return ZE_RESULT_SUCCESS;
225225
}
226226

227-
ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) {
227+
ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) {
228228
if (isTimestampEvent) {
229229
hostEventSetValueTimestamps(eventVal);
230230
}
@@ -292,8 +292,8 @@ ze_result_t EventImp::reset() {
292292
ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) {
293293
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
294294
uint64_t *tsptr = nullptr;
295-
uint64_t tsData = Event::STATE_INITIAL;
296295
constexpr uint64_t tsMask = (1ull << 32) - 1;
296+
uint64_t tsData = Event::STATE_INITIAL & tsMask;
297297

298298
if (!this->isTimestampEvent)
299299
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
@@ -305,15 +305,8 @@ ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void
305305
}
306306

307307
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) {
308-
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW));
309-
auto tsptrUpper = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH));
310-
311-
tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask);
312-
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
313-
return ZE_RESULT_SUCCESS;
314-
}
315-
316-
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) {
308+
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START));
309+
} else if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) {
317310
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END));
318311
} else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) {
319312
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START));

level_zero/core/source/event/event.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ struct Event : _ze_event_handle_t {
3030
virtual ze_result_t reset() = 0;
3131
virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0;
3232

33-
enum State : uint64_t {
33+
enum State : uint32_t {
3434
STATE_SIGNALED = 0u,
35-
STATE_CLEARED = static_cast<uint64_t>(-1),
35+
STATE_CLEARED = static_cast<uint32_t>(-1),
3636
STATE_INITIAL = STATE_CLEARED
3737
};
3838

3939
enum EventTimestampRegister : uint32_t {
40-
GLOBAL_START_LOW = 0u,
41-
GLOBAL_START_HIGH,
40+
GLOBAL_START = 0u,
4241
GLOBAL_END,
4342
CONTEXT_START,
4443
CONTEXT_END

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "shared/source/command_container/command_encoder.h"
99
#include "shared/source/helpers/preamble.h"
10+
#include "shared/source/helpers/register_offsets.h"
1011
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
1112

1213
#include "opencl/source/helpers/hardware_commands_helper.h"
@@ -232,6 +233,94 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppend
232233
}
233234
}
234235

236+
using TimestampEventSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
237+
HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernelThenSRMAndPCEncoded, TimestampEventSupport) {
238+
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
239+
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
240+
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
241+
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
242+
243+
Mock<::L0::Kernel> kernel;
244+
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, false));
245+
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
246+
ze_event_pool_desc_t eventPoolDesc = {
247+
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
248+
ZE_EVENT_POOL_FLAG_TIMESTAMP,
249+
1};
250+
251+
ze_event_desc_t eventDesc = {
252+
ZE_EVENT_DESC_VERSION_CURRENT,
253+
0,
254+
ZE_EVENT_SCOPE_FLAG_NONE,
255+
ZE_EVENT_SCOPE_FLAG_NONE};
256+
257+
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(device, &eventPoolDesc));
258+
auto event = std::unique_ptr<Event>(Event::create(eventPool.get(), &eventDesc, device));
259+
260+
ze_group_count_t groupCount{1, 1, 1};
261+
auto result = commandList->appendLaunchKernel(
262+
kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr);
263+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
264+
265+
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
266+
EXPECT_GT(usedSpaceAfter, usedSpaceBefore);
267+
268+
GenCmdList cmdList;
269+
EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer(
270+
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
271+
272+
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
273+
ASSERT_NE(cmdList.end(), itor);
274+
{
275+
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
276+
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getRegisterAddress());
277+
}
278+
itor++;
279+
280+
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
281+
ASSERT_NE(cmdList.end(), itor);
282+
{
283+
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
284+
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress());
285+
}
286+
itor++;
287+
288+
itor = find<GPGPU_WALKER *>(cmdList.begin(), cmdList.end());
289+
ASSERT_NE(cmdList.end(), itor);
290+
itor++;
291+
292+
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
293+
EXPECT_NE(0u, itorPC.size());
294+
bool postSyncFound = false;
295+
for (auto it : itorPC) {
296+
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
297+
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
298+
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
299+
EXPECT_FALSE(cmd->getDcFlushEnable());
300+
auto gpuAddress = event->getGpuAddress() +
301+
event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
302+
EXPECT_EQ(cmd->getAddressHigh(), gpuAddress >> 32u);
303+
EXPECT_EQ(cmd->getAddress(), uint32_t(gpuAddress));
304+
postSyncFound = true;
305+
}
306+
}
307+
EXPECT_TRUE(postSyncFound);
308+
309+
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
310+
EXPECT_NE(cmdList.end(), itor);
311+
{
312+
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
313+
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress());
314+
}
315+
316+
{
317+
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
318+
std::end(commandList->commandContainer.getResidencyContainer()),
319+
&event->getAllocation());
320+
EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
321+
}
322+
}
323+
235324
HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelThenKernelIsExecutedOnImmediateCmdQ, SklPlusMatcher) {
236325
createKernel();
237326

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#
2+
# Copyright (C) 2020 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
#
6+
7+
target_sources(${TARGET_NAME} PRIVATE
8+
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
9+
${CMAKE_CURRENT_SOURCE_DIR}/test_event.cpp
10+
)
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright (C) 2020 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
*/
7+
8+
#include "test.h"
9+
10+
#include "level_zero/core/source/driver/driver_handle_imp.h"
11+
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
12+
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
13+
14+
namespace L0 {
15+
namespace ult {
16+
17+
using EventPoolCreate = Test<DeviceFixture>;
18+
19+
TEST_F(EventPoolCreate, allocationContainsAtLeast16Bytes) {
20+
ze_event_pool_desc_t eventPoolDesc = {
21+
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
22+
ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
23+
1};
24+
25+
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(device, &eventPoolDesc));
26+
ASSERT_NE(nullptr, eventPool);
27+
28+
auto allocation = &eventPool->getAllocation();
29+
ASSERT_NE(nullptr, allocation);
30+
31+
uint32_t minAllocationSize = eventPool->getEventSize();
32+
EXPECT_GE(allocation->getUnderlyingBufferSize(), minAllocationSize);
33+
}
34+
35+
TEST_F(EventPoolCreate, givenTimestampEventsThenVerifyNumTimestampsToRead) {
36+
ze_event_pool_desc_t eventPoolDesc = {
37+
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
38+
ZE_EVENT_POOL_FLAG_TIMESTAMP, // all events in pool are visible to Host
39+
1};
40+
41+
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(device, &eventPoolDesc));
42+
ASSERT_NE(nullptr, eventPool);
43+
44+
uint32_t numTimestamps = 4u;
45+
EXPECT_EQ(numTimestamps, eventPool->getNumEventTimestampsToRead());
46+
}
47+
48+
class TimestampEventCreate : public Test<DeviceFixture> {
49+
public:
50+
void SetUp() override {
51+
DeviceFixture::SetUp();
52+
ze_event_pool_desc_t eventPoolDesc = {
53+
ZE_EVENT_POOL_DESC_VERSION_CURRENT,
54+
ZE_EVENT_POOL_FLAG_TIMESTAMP,
55+
1};
56+
57+
ze_event_desc_t eventDesc = {
58+
ZE_EVENT_DESC_VERSION_CURRENT,
59+
0,
60+
ZE_EVENT_SCOPE_FLAG_NONE,
61+
ZE_EVENT_SCOPE_FLAG_NONE};
62+
63+
eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(device, &eventPoolDesc));
64+
ASSERT_NE(nullptr, eventPool);
65+
event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
66+
ASSERT_NE(nullptr, eventPool);
67+
}
68+
69+
void TearDown() override {
70+
DeviceFixture::TearDown();
71+
}
72+
73+
std::unique_ptr<L0::EventPool> eventPool;
74+
std::unique_ptr<L0::Event> event;
75+
};
76+
77+
TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventFlagSet) {
78+
EXPECT_TRUE(event->isTimestampEvent);
79+
}
80+
81+
TEST_F(TimestampEventCreate, givenEventTimestampsNotTriggeredThenValuesInInitialState) {
82+
uint64_t globalStart, globalEnd, contextStart, contextEnd;
83+
84+
event->getTimestamp(ZE_EVENT_TIMESTAMP_GLOBAL_START, &globalStart);
85+
event->getTimestamp(ZE_EVENT_TIMESTAMP_GLOBAL_END, &globalEnd);
86+
event->getTimestamp(ZE_EVENT_TIMESTAMP_CONTEXT_START, &contextStart);
87+
event->getTimestamp(ZE_EVENT_TIMESTAMP_CONTEXT_END, &contextEnd);
88+
89+
EXPECT_EQ(static_cast<uint64_t>(Event::STATE_CLEARED), globalStart);
90+
EXPECT_EQ(static_cast<uint64_t>(Event::STATE_CLEARED), globalEnd);
91+
EXPECT_EQ(static_cast<uint64_t>(Event::STATE_CLEARED), contextStart);
92+
EXPECT_EQ(static_cast<uint64_t>(Event::STATE_CLEARED), contextEnd);
93+
}
94+
95+
TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) {
96+
auto allocation = &eventPool->getAllocation();
97+
ASSERT_NE(nullptr, allocation);
98+
99+
uint32_t minTimestampEventAllocation = eventPool->getEventSize() *
100+
eventPool->getNumEventTimestampsToRead();
101+
EXPECT_GE(minTimestampEventAllocation, allocation->getUnderlyingBufferSize());
102+
}
103+
104+
} // namespace ult
105+
} // namespace L0

0 commit comments

Comments
 (0)