|
24 | 24 | #include "level_zero/core/source/device/device.h" |
25 | 25 | #include "level_zero/core/source/device/device_imp.h" |
26 | 26 | #include "level_zero/core/source/driver/driver_handle_imp.h" |
| 27 | +#include "level_zero/core/source/event/event_impl.inl" |
27 | 28 | #include "level_zero/tools/source/metrics/metric.h" |
28 | 29 |
|
29 | 30 | #include <set> |
@@ -110,289 +111,16 @@ ze_result_t EventPoolImp::createEvent(const ze_event_desc_t *desc, ze_event_hand |
110 | 111 | if (desc->index > (getNumEvents() - 1)) { |
111 | 112 | return ZE_RESULT_ERROR_INVALID_ARGUMENT; |
112 | 113 | } |
113 | | - *phEvent = Event::create(this, desc, this->getDevice()); |
| 114 | + *phEvent = Event::create<uint32_t>(this, desc, this->getDevice()); |
114 | 115 |
|
115 | 116 | return ZE_RESULT_SUCCESS; |
116 | 117 | } |
117 | 118 |
|
118 | | -uint64_t EventImp::getGpuAddress(Device *device) { |
119 | | - auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); |
120 | | - return (alloc->getGpuAddress() + (index * eventPool->getEventSize())); |
121 | | -} |
122 | | - |
123 | | -Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { |
124 | | - auto event = new EventImp(eventPool, desc->index, device); |
125 | | - UNRECOVERABLE_IF(event == nullptr); |
126 | | - |
127 | | - if (eventPool->isEventPoolUsedForTimestamp) { |
128 | | - event->isTimestampEvent = true; |
129 | | - event->kernelTimestampsData = std::make_unique<NEO::TimestampPackets<uint32_t>[]>(EventPacketsCount::maxKernelSplit); |
130 | | - } |
131 | | - |
132 | | - auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); |
133 | | - |
134 | | - uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer()); |
135 | | - event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize())); |
136 | | - event->signalScope = desc->signal; |
137 | | - event->waitScope = desc->wait; |
138 | | - event->csr = static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver; |
139 | | - event->reset(); |
140 | | - |
141 | | - return event; |
142 | | -} |
143 | | - |
144 | | -NEO::GraphicsAllocation &EventImp::getAllocation(Device *device) { |
145 | | - return *this->eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); |
146 | | -} |
147 | | - |
148 | | -void Event::resetPackets() { |
149 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
150 | | - kernelTimestampsData[i].setPacketsUsed(1); |
151 | | - } |
152 | | - kernelCount = 1; |
153 | | -} |
154 | | - |
155 | | -uint32_t Event::getPacketsInUse() { |
156 | | - if (isTimestampEvent) { |
157 | | - uint32_t packetsInUse = 0; |
158 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
159 | | - packetsInUse += kernelTimestampsData[i].getPacketsUsed(); |
160 | | - }; |
161 | | - return packetsInUse; |
162 | | - } else { |
163 | | - return 1; |
164 | | - } |
165 | | -} |
166 | | - |
167 | | -void Event::setPacketsInUse(uint32_t value) { |
168 | | - kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value); |
169 | | -}; |
170 | | - |
171 | | -uint64_t Event::getPacketAddress(Device *device) { |
172 | | - uint64_t address = getGpuAddress(device); |
173 | | - if (isTimestampEvent && kernelCount > 1) { |
174 | | - for (uint32_t i = 0; i < kernelCount - 1; i++) { |
175 | | - address += kernelTimestampsData[i].getPacketsUsed() * |
176 | | - NEO::TimestampPackets<uint32_t>::getSinglePacketSize(); |
177 | | - } |
178 | | - } |
179 | | - return address; |
180 | | -} |
181 | | - |
182 | | -ze_result_t EventImp::calculateProfilingData() { |
183 | | - globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0); |
184 | | - globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0); |
185 | | - contextStartTS = kernelTimestampsData[0].getContextStartValue(0); |
186 | | - contextEndTS = kernelTimestampsData[0].getContextEndValue(0); |
187 | | - |
188 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
189 | | - for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) { |
190 | | - if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) { |
191 | | - globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId); |
192 | | - } |
193 | | - if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) { |
194 | | - contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId); |
195 | | - } |
196 | | - if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) { |
197 | | - contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId); |
198 | | - } |
199 | | - if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) { |
200 | | - globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId); |
201 | | - } |
202 | | - } |
203 | | - } |
204 | | - |
205 | | - return ZE_RESULT_SUCCESS; |
206 | | -} |
207 | | - |
208 | | -void EventImp::assignTimestampData(void *address) { |
209 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
210 | | - uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed(); |
211 | | - for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) { |
212 | | - kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address); |
213 | | - address = ptrOffset(address, NEO::TimestampPackets<uint32_t>::getSinglePacketSize()); |
214 | | - } |
215 | | - } |
216 | | -} |
217 | | - |
218 | 119 | ze_result_t Event::destroy() { |
219 | 120 | delete this; |
220 | 121 | return ZE_RESULT_SUCCESS; |
221 | 122 | } |
222 | 123 |
|
223 | | -ze_result_t EventImp::queryStatusKernelTimestamp() { |
224 | | - assignTimestampData(hostAddress); |
225 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
226 | | - uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed(); |
227 | | - for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { |
228 | | - if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) { |
229 | | - return ZE_RESULT_NOT_READY; |
230 | | - } |
231 | | - } |
232 | | - } |
233 | | - return ZE_RESULT_SUCCESS; |
234 | | -} |
235 | | - |
236 | | -ze_result_t EventImp::queryStatus() { |
237 | | - uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress); |
238 | | - uint32_t queryVal = Event::STATE_CLEARED; |
239 | | - ze_result_t retVal; |
240 | | - |
241 | | - if (metricStreamer != nullptr) { |
242 | | - *hostAddr = metricStreamer->getNotificationState(); |
243 | | - } |
244 | | - this->csr->downloadAllocations(); |
245 | | - if (isTimestampEvent) { |
246 | | - return queryStatusKernelTimestamp(); |
247 | | - } |
248 | | - memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t)); |
249 | | - retVal = (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; |
250 | | - |
251 | | - if (retVal == ZE_RESULT_NOT_READY) { |
252 | | - return retVal; |
253 | | - } |
254 | | - |
255 | | - if (updateTaskCountEnabled) { |
256 | | - this->csr->flushTagUpdate(); |
257 | | - updateTaskCountEnabled = false; |
258 | | - } |
259 | | - |
260 | | - return retVal; |
261 | | -} |
262 | | - |
263 | | -ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { |
264 | | - |
265 | | - auto baseAddr = reinterpret_cast<uint64_t>(hostAddress); |
266 | | - auto signalScopeFlag = this->signalScope; |
267 | | - |
268 | | - auto eventTsSetFunc = [&eventVal, &signalScopeFlag](auto tsAddr) { |
269 | | - auto tsptr = reinterpret_cast<void *>(tsAddr); |
270 | | - |
271 | | - memcpy_s(tsptr, sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t)); |
272 | | - if (!signalScopeFlag) { |
273 | | - NEO::CpuIntrinsics::clFlush(tsptr); |
274 | | - } |
275 | | - }; |
276 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
277 | | - uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed(); |
278 | | - for (uint32_t i = 0; i < packetsToSet; i++) { |
279 | | - eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getContextStartOffset()); |
280 | | - eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getGlobalStartOffset()); |
281 | | - eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getContextEndOffset()); |
282 | | - eventTsSetFunc(baseAddr + NEO::TimestampPackets<uint32_t>::getGlobalEndOffset()); |
283 | | - baseAddr += NEO::TimestampPackets<uint32_t>::getSinglePacketSize(); |
284 | | - } |
285 | | - } |
286 | | - assignTimestampData(hostAddress); |
287 | | - |
288 | | - return ZE_RESULT_SUCCESS; |
289 | | -} |
290 | | - |
291 | | -ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { |
292 | | - if (isTimestampEvent) { |
293 | | - return hostEventSetValueTimestamps(eventVal); |
294 | | - } |
295 | | - |
296 | | - auto hostAddr = static_cast<uint64_t *>(hostAddress); |
297 | | - UNRECOVERABLE_IF(hostAddr == nullptr); |
298 | | - memcpy_s(static_cast<void *>(hostAddr), sizeof(uint32_t), static_cast<void *>(&eventVal), sizeof(uint32_t)); |
299 | | - |
300 | | - if (updateTaskCountEnabled) { |
301 | | - this->csr->flushTagUpdate(); |
302 | | - updateTaskCountEnabled = false; |
303 | | - } |
304 | | - |
305 | | - NEO::CpuIntrinsics::clFlush(hostAddr); |
306 | | - |
307 | | - return ZE_RESULT_SUCCESS; |
308 | | -} |
309 | | - |
310 | | -ze_result_t EventImp::hostSignal() { |
311 | | - return hostEventSetValue(Event::STATE_SIGNALED); |
312 | | -} |
313 | | - |
314 | | -ze_result_t EventImp::hostSynchronize(uint64_t timeout) { |
315 | | - std::chrono::high_resolution_clock::time_point time1, time2; |
316 | | - uint64_t timeDiff = 0; |
317 | | - |
318 | | - ze_result_t ret = ZE_RESULT_NOT_READY; |
319 | | - |
320 | | - if (this->csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { |
321 | | - return ZE_RESULT_SUCCESS; |
322 | | - } |
323 | | - |
324 | | - if (timeout == 0) { |
325 | | - return queryStatus(); |
326 | | - } |
327 | | - |
328 | | - time1 = std::chrono::high_resolution_clock::now(); |
329 | | - while (true) { |
330 | | - ret = queryStatus(); |
331 | | - if (ret == ZE_RESULT_SUCCESS) { |
332 | | - return ret; |
333 | | - } |
334 | | - |
335 | | - NEO::WaitUtils::waitFunction(nullptr, 0u); |
336 | | - |
337 | | - if (timeout == std::numeric_limits<uint32_t>::max()) { |
338 | | - continue; |
339 | | - } |
340 | | - |
341 | | - time2 = std::chrono::high_resolution_clock::now(); |
342 | | - timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(time2 - time1).count(); |
343 | | - |
344 | | - if (timeDiff >= timeout) { |
345 | | - break; |
346 | | - } |
347 | | - } |
348 | | - |
349 | | - return ret; |
350 | | -} |
351 | | - |
352 | | -ze_result_t EventImp::reset() { |
353 | | - if (isTimestampEvent) { |
354 | | - kernelCount = EventPacketsCount::maxKernelSplit; |
355 | | - for (uint32_t i = 0; i < kernelCount; i++) { |
356 | | - kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); |
357 | | - } |
358 | | - hostEventSetValue(Event::STATE_INITIAL); |
359 | | - resetPackets(); |
360 | | - return ZE_RESULT_SUCCESS; |
361 | | - } else { |
362 | | - return hostEventSetValue(Event::STATE_INITIAL); |
363 | | - } |
364 | | -} |
365 | | - |
366 | | -ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) { |
367 | | - |
368 | | - ze_kernel_timestamp_result_t &result = *dstptr; |
369 | | - |
370 | | - if (queryStatus() != ZE_RESULT_SUCCESS) { |
371 | | - return ZE_RESULT_NOT_READY; |
372 | | - } |
373 | | - |
374 | | - assignTimestampData(hostAddress); |
375 | | - calculateProfilingData(); |
376 | | - |
377 | | - auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) { |
378 | | - memcpy_s(&(timestampFieldForWriting), sizeof(uint64_t), static_cast<void *>(×tampFieldToCopy), sizeof(uint64_t)); |
379 | | - }; |
380 | | - |
381 | | - if (!NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { |
382 | | - eventTsSetFunc(contextStartTS, result.context.kernelStart); |
383 | | - eventTsSetFunc(globalStartTS, result.global.kernelStart); |
384 | | - eventTsSetFunc(contextEndTS, result.context.kernelEnd); |
385 | | - eventTsSetFunc(globalEndTS, result.global.kernelEnd); |
386 | | - } else { |
387 | | - eventTsSetFunc(globalStartTS, result.context.kernelStart); |
388 | | - eventTsSetFunc(globalStartTS, result.global.kernelStart); |
389 | | - eventTsSetFunc(globalEndTS, result.context.kernelEnd); |
390 | | - eventTsSetFunc(globalEndTS, result.global.kernelEnd); |
391 | | - } |
392 | | - |
393 | | - return ZE_RESULT_SUCCESS; |
394 | | -} |
395 | | - |
396 | 124 | EventPool *EventPool::create(DriverHandle *driver, Context *context, uint32_t numDevices, |
397 | 125 | ze_device_handle_t *phDevices, |
398 | 126 | const ze_event_pool_desc_t *desc) { |
|
0 commit comments