Skip to content

Commit 424b214

Browse files
L0::Event to support dynamic size
Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent a5d3817 commit 424b214

15 files changed

+406
-363
lines changed

level_zero/core/source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ set(L0_RUNTIME_SOURCES
4444
${CMAKE_CURRENT_SOURCE_DIR}/driver/driver_imp.h
4545
${CMAKE_CURRENT_SOURCE_DIR}/driver/host_pointer_manager.cpp
4646
${CMAKE_CURRENT_SOURCE_DIR}/driver/host_pointer_manager.h
47+
${CMAKE_CURRENT_SOURCE_DIR}/event/event_impl.inl
4748
${CMAKE_CURRENT_SOURCE_DIR}/event/event.cpp
4849
${CMAKE_CURRENT_SOURCE_DIR}/event/event.h
4950
${CMAKE_CURRENT_SOURCE_DIR}/fence/fence.cpp

level_zero/core/source/event/event.cpp

Lines changed: 2 additions & 274 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "level_zero/core/source/device/device.h"
2525
#include "level_zero/core/source/device/device_imp.h"
2626
#include "level_zero/core/source/driver/driver_handle_imp.h"
27+
#include "level_zero/core/source/event/event_impl.inl"
2728
#include "level_zero/tools/source/metrics/metric.h"
2829

2930
#include <set>
@@ -110,289 +111,16 @@ ze_result_t EventPoolImp::createEvent(const ze_event_desc_t *desc, ze_event_hand
110111
if (desc->index > (getNumEvents() - 1)) {
111112
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
112113
}
113-
*phEvent = Event::create(this, desc, this->getDevice());
114+
*phEvent = Event::create<uint32_t>(this, desc, this->getDevice());
114115

115116
return ZE_RESULT_SUCCESS;
116117
}
117118

118-
uint64_t EventImp::getGpuAddress(Device *device) {
119-
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
120-
return (alloc->getGpuAddress() + (index * eventPool->getEventSize()));
121-
}
122-
123-
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
124-
auto event = new EventImp(eventPool, desc->index, device);
125-
UNRECOVERABLE_IF(event == nullptr);
126-
127-
if (eventPool->isEventPoolUsedForTimestamp) {
128-
event->isTimestampEvent = true;
129-
event->kernelTimestampsData = std::make_unique<NEO::TimestampPackets<uint32_t>[]>(EventPacketsCount::maxKernelSplit);
130-
}
131-
132-
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
133-
134-
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
135-
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
136-
event->signalScope = desc->signal;
137-
event->waitScope = desc->wait;
138-
event->csr = static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver;
139-
event->reset();
140-
141-
return event;
142-
}
143-
144-
NEO::GraphicsAllocation &EventImp::getAllocation(Device *device) {
145-
return *this->eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
146-
}
147-
148-
void Event::resetPackets() {
149-
for (uint32_t i = 0; i < kernelCount; i++) {
150-
kernelTimestampsData[i].setPacketsUsed(1);
151-
}
152-
kernelCount = 1;
153-
}
154-
155-
uint32_t Event::getPacketsInUse() {
156-
if (isTimestampEvent) {
157-
uint32_t packetsInUse = 0;
158-
for (uint32_t i = 0; i < kernelCount; i++) {
159-
packetsInUse += kernelTimestampsData[i].getPacketsUsed();
160-
};
161-
return packetsInUse;
162-
} else {
163-
return 1;
164-
}
165-
}
166-
167-
void Event::setPacketsInUse(uint32_t value) {
168-
kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value);
169-
};
170-
171-
uint64_t Event::getPacketAddress(Device *device) {
172-
uint64_t address = getGpuAddress(device);
173-
if (isTimestampEvent && kernelCount > 1) {
174-
for (uint32_t i = 0; i < kernelCount - 1; i++) {
175-
address += kernelTimestampsData[i].getPacketsUsed() *
176-
NEO::TimestampPackets<uint32_t>::getSinglePacketSize();
177-
}
178-
}
179-
return address;
180-
}
181-
182-
ze_result_t EventImp::calculateProfilingData() {
183-
globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0);
184-
globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0);
185-
contextStartTS = kernelTimestampsData[0].getContextStartValue(0);
186-
contextEndTS = kernelTimestampsData[0].getContextEndValue(0);
187-
188-
for (uint32_t i = 0; i < kernelCount; i++) {
189-
for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) {
190-
if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) {
191-
globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId);
192-
}
193-
if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) {
194-
contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId);
195-
}
196-
if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) {
197-
contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId);
198-
}
199-
if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) {
200-
globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId);
201-
}
202-
}
203-
}
204-
205-
return ZE_RESULT_SUCCESS;
206-
}
207-
208-
void EventImp::assignTimestampData(void *address) {
209-
for (uint32_t i = 0; i < kernelCount; i++) {
210-
uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed();
211-
for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) {
212-
kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address);
213-
address = ptrOffset(address, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
214-
}
215-
}
216-
}
217-
218119
ze_result_t Event::destroy() {
219120
delete this;
220121
return ZE_RESULT_SUCCESS;
221122
}
222123

223-
ze_result_t EventImp::queryStatusKernelTimestamp() {
224-
assignTimestampData(hostAddress);
225-
for (uint32_t i = 0; i < kernelCount; i++) {
226-
uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed();
227-
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
228-
if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) {
229-
return ZE_RESULT_NOT_READY;
230-
}
231-
}
232-
}
233-
return ZE_RESULT_SUCCESS;
234-
}
235-
236-
ze_result_t EventImp::queryStatus() {
237-
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
238-
uint32_t queryVal = Event::STATE_CLEARED;
239-
ze_result_t retVal;
240-
241-
if (metricStreamer != nullptr) {
242-
*hostAddr = metricStreamer->getNotificationState();
243-
}
244-
this->csr->downloadAllocations();
245-
if (isTimestampEvent) {
246-
return queryStatusKernelTimestamp();
247-
}
248-
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
249-
retVal = (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
250-
251-
if (retVal == ZE_RESULT_NOT_READY) {
252-
return retVal;
253-
}
254-
255-
if (updateTaskCountEnabled) {
256-
this->csr->flushTagUpdate();
257-
updateTaskCountEnabled = false;
258-
}
259-
260-
return retVal;
261-
}
262-
263-
ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
264-
265-
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
266-
auto signalScopeFlag = this->signalScope;
267-
268-
auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) {
269-
auto tsptr = reinterpret_cast<void *>(tsAddr);
270-
271-
memcpy_s(tsptr, sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t));
272-
if (!signalScopeFlag) {
273-
NEO::CpuIntrinsics::clFlush(tsptr);
274-
}
275-
};
276-
for (uint32_t i = 0; i < kernelCount; i++) {
277-
uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed();
278-
for (uint32_t i = 0; i < packetsToSet; i++) {
279-
eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getContextStartOffset());
280-
eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getGlobalStartOffset());
281-
eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getContextEndOffset());
282-
eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getGlobalEndOffset());
283-
baseAddr += NEO::TimestampPackets<uint32_t>::getSinglePacketSize();
284-
}
285-
}
286-
assignTimestampData(hostAddress);
287-
288-
return ZE_RESULT_SUCCESS;
289-
}
290-
291-
ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) {
292-
if (isTimestampEvent) {
293-
return hostEventSetValueTimestamps(eventVal);
294-
}
295-
296-
auto hostAddr = static_cast<uint64_t *>(hostAddress);
297-
UNRECOVERABLE_IF(hostAddr == nullptr);
298-
memcpy_s(static_cast<void *>(hostAddr), sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t));
299-
300-
if (updateTaskCountEnabled) {
301-
this->csr->flushTagUpdate();
302-
updateTaskCountEnabled = false;
303-
}
304-
305-
NEO::CpuIntrinsics::clFlush(hostAddr);
306-
307-
return ZE_RESULT_SUCCESS;
308-
}
309-
310-
ze_result_t EventImp::hostSignal() {
311-
return hostEventSetValue(Event::STATE_SIGNALED);
312-
}
313-
314-
ze_result_t EventImp::hostSynchronize(uint64_t timeout) {
315-
std::chrono::high_resolution_clock::time_point time1, time2;
316-
uint64_t timeDiff = 0;
317-
318-
ze_result_t ret = ZE_RESULT_NOT_READY;
319-
320-
if (this->csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) {
321-
return ZE_RESULT_SUCCESS;
322-
}
323-
324-
if (timeout == 0) {
325-
return queryStatus();
326-
}
327-
328-
time1 = std::chrono::high_resolution_clock::now();
329-
while (true) {
330-
ret = queryStatus();
331-
if (ret == ZE_RESULT_SUCCESS) {
332-
return ret;
333-
}
334-
335-
NEO::WaitUtils::waitFunction(nullptr, 0u);
336-
337-
if (timeout == std::numeric_limits<uint32_t>::max()) {
338-
continue;
339-
}
340-
341-
time2 = std::chrono::high_resolution_clock::now();
342-
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(time2 - time1).count();
343-
344-
if (timeDiff >= timeout) {
345-
break;
346-
}
347-
}
348-
349-
return ret;
350-
}
351-
352-
ze_result_t EventImp::reset() {
353-
if (isTimestampEvent) {
354-
kernelCount = EventPacketsCount::maxKernelSplit;
355-
for (uint32_t i = 0; i < kernelCount; i++) {
356-
kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
357-
}
358-
hostEventSetValue(Event::STATE_INITIAL);
359-
resetPackets();
360-
return ZE_RESULT_SUCCESS;
361-
} else {
362-
return hostEventSetValue(Event::STATE_INITIAL);
363-
}
364-
}
365-
366-
ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) {
367-
368-
ze_kernel_timestamp_result_t &result = *dstptr;
369-
370-
if (queryStatus() != ZE_RESULT_SUCCESS) {
371-
return ZE_RESULT_NOT_READY;
372-
}
373-
374-
assignTimestampData(hostAddress);
375-
calculateProfilingData();
376-
377-
auto eventTsSetFunc = [&](uint64_t &timestampFieldToCopy, uint64_t &timestampFieldForWriting) {
378-
memcpy_s(&(timestampFieldForWriting), sizeof(uint64_t), static_cast<void *>(&timestampFieldToCopy), sizeof(uint64_t));
379-
};
380-
381-
if (!NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) {
382-
eventTsSetFunc(contextStartTS, result.context.kernelStart);
383-
eventTsSetFunc(globalStartTS, result.global.kernelStart);
384-
eventTsSetFunc(contextEndTS, result.context.kernelEnd);
385-
eventTsSetFunc(globalEndTS, result.global.kernelEnd);
386-
} else {
387-
eventTsSetFunc(globalStartTS, result.context.kernelStart);
388-
eventTsSetFunc(globalStartTS, result.global.kernelStart);
389-
eventTsSetFunc(globalEndTS, result.context.kernelEnd);
390-
eventTsSetFunc(globalEndTS, result.global.kernelEnd);
391-
}
392-
393-
return ZE_RESULT_SUCCESS;
394-
}
395-
396124
EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices,
397125
ze_device_handle_t *phDevices,
398126
const ze_event_pool_desc_t *desc) {

level_zero/core/source/event/event.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct Event : _ze_event_handle_t {
4242
STATE_INITIAL = STATE_CLEARED
4343
};
4444

45+
template <typename TagSizeT>
4546
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
4647

4748
static Event *fromHandle(ze_event_handle_t handle) { return static_cast<Event *>(handle); }
@@ -51,11 +52,11 @@ struct Event : _ze_event_handle_t {
5152
virtual NEO::GraphicsAllocation &getAllocation(Device *device) = 0;
5253

5354
virtual uint64_t getGpuAddress(Device *device) = 0;
54-
uint32_t getPacketsInUse();
55-
uint64_t getPacketAddress(Device *device);
56-
void resetPackets();
55+
virtual uint32_t getPacketsInUse() = 0;
56+
virtual uint64_t getPacketAddress(Device *device) = 0;
57+
virtual void resetPackets() = 0;
5758
void *getHostAddress() { return hostAddress; }
58-
void setPacketsInUse(uint32_t value);
59+
virtual void setPacketsInUse(uint32_t value) = 0;
5960
uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; }
6061
void *hostAddress = nullptr;
6162
uint32_t kernelCount = 1u;
@@ -64,7 +65,6 @@ struct Event : _ze_event_handle_t {
6465
bool isTimestampEvent = false;
6566
bool updateTaskCountEnabled = false;
6667

67-
std::unique_ptr<NEO::TimestampPackets<uint32_t>[]> kernelTimestampsData = nullptr;
6868
uint64_t globalStartTS;
6969
uint64_t globalEndTS;
7070
uint64_t contextStartTS;
@@ -79,6 +79,7 @@ struct Event : _ze_event_handle_t {
7979
NEO::GraphicsAllocation *allocation = nullptr;
8080
};
8181

82+
template <typename TagSizeT>
8283
struct EventImp : public Event {
8384
EventImp(EventPool *eventPool, int index, Device *device)
8485
: device(device), index(index), eventPool(eventPool) {}
@@ -99,6 +100,13 @@ struct EventImp : public Event {
99100

100101
uint64_t getGpuAddress(Device *device) override;
101102

103+
void resetPackets() override;
104+
uint64_t getPacketAddress(Device *device) override;
105+
uint32_t getPacketsInUse() override;
106+
void setPacketsInUse(uint32_t value) override;
107+
108+
std::unique_ptr<NEO::TimestampPackets<TagSizeT>[]> kernelTimestampsData;
109+
102110
Device *device;
103111
int index;
104112
EventPool *eventPool;
@@ -107,7 +115,7 @@ struct EventImp : public Event {
107115
ze_result_t calculateProfilingData();
108116
ze_result_t queryStatusKernelTimestamp();
109117
ze_result_t hostEventSetValue(uint32_t eventValue);
110-
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
118+
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
111119
void assignTimestampData(void *address);
112120
};
113121

0 commit comments

Comments
 (0)