@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
1515
1616 if (eventPool->isEventPoolTimestampFlagSet ()) {
1717 event->setEventTimestampFlag (true );
18- event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
1918 }
19+ event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
2020
2121 auto alloc = eventPool->getAllocation ().getGraphicsAllocation (device->getNEODevice ()->getRootDeviceIndex ());
2222
@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
4949
5050template <typename TagSizeT>
5151ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
52- globalStartTS = kernelTimestampsData [0 ].getGlobalStartValue (0 );
53- globalEndTS = kernelTimestampsData [0 ].getGlobalEndValue (0 );
54- contextStartTS = kernelTimestampsData [0 ].getContextStartValue (0 );
55- contextEndTS = kernelTimestampsData [0 ].getContextEndValue (0 );
52+ globalStartTS = kernelEventCompletionData [0 ].getGlobalStartValue (0 );
53+ globalEndTS = kernelEventCompletionData [0 ].getGlobalEndValue (0 );
54+ contextStartTS = kernelEventCompletionData [0 ].getContextStartValue (0 );
55+ contextEndTS = kernelEventCompletionData [0 ].getContextEndValue (0 );
5656
5757 for (uint32_t i = 0 ; i < kernelCount; i++) {
58- for (auto packetId = 0u ; packetId < kernelTimestampsData [i].getPacketsUsed (); packetId++) {
59- if (globalStartTS > kernelTimestampsData [i].getGlobalStartValue (packetId)) {
60- globalStartTS = kernelTimestampsData [i].getGlobalStartValue (packetId);
58+ for (auto packetId = 0u ; packetId < kernelEventCompletionData [i].getPacketsUsed (); packetId++) {
59+ if (globalStartTS > kernelEventCompletionData [i].getGlobalStartValue (packetId)) {
60+ globalStartTS = kernelEventCompletionData [i].getGlobalStartValue (packetId);
6161 }
62- if (contextStartTS > kernelTimestampsData [i].getContextStartValue (packetId)) {
63- contextStartTS = kernelTimestampsData [i].getContextStartValue (packetId);
62+ if (contextStartTS > kernelEventCompletionData [i].getContextStartValue (packetId)) {
63+ contextStartTS = kernelEventCompletionData [i].getContextStartValue (packetId);
6464 }
65- if (contextEndTS < kernelTimestampsData [i].getContextEndValue (packetId)) {
66- contextEndTS = kernelTimestampsData [i].getContextEndValue (packetId);
65+ if (contextEndTS < kernelEventCompletionData [i].getContextEndValue (packetId)) {
66+ contextEndTS = kernelEventCompletionData [i].getContextEndValue (packetId);
6767 }
68- if (globalEndTS < kernelTimestampsData [i].getGlobalEndValue (packetId)) {
69- globalEndTS = kernelTimestampsData [i].getGlobalEndValue (packetId);
68+ if (globalEndTS < kernelEventCompletionData [i].getGlobalEndValue (packetId)) {
69+ globalEndTS = kernelEventCompletionData [i].getGlobalEndValue (packetId);
7070 }
7171 }
7272 }
@@ -75,23 +75,40 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
7575}
7676
7777template <typename TagSizeT>
78- void EventImp<TagSizeT>::assignTimestampData (void *address) {
78+ void EventImp<TagSizeT>::assignKernelEventCompletionData (void *address) {
7979 for (uint32_t i = 0 ; i < kernelCount; i++) {
80- uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed ();
80+ uint32_t packetsToCopy = 0 ;
81+ packetsToCopy = kernelEventCompletionData[i].getPacketsUsed ();
8182 for (uint32_t packetId = 0 ; packetId < packetsToCopy; packetId++) {
82- kernelTimestampsData [i].assignDataToAllTimestamps (packetId, address);
83+ kernelEventCompletionData [i].assignDataToAllTimestamps (packetId, address);
8384 address = ptrOffset (address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ());
8485 }
8586 }
8687}
8788
8889template <typename TagSizeT>
8990ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
90- assignTimestampData (hostAddress);
91+ assignKernelEventCompletionData (hostAddress);
92+ uint32_t queryVal = Event::STATE_CLEARED;
93+ for (uint32_t i = 0 ; i < kernelCount; i++) {
94+ uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed ();
95+ for (uint32_t packetId = 0 ; packetId < packetsToCheck; packetId++) {
96+ if (kernelEventCompletionData[i].getContextEndValue (packetId) == queryVal) {
97+ return ZE_RESULT_NOT_READY;
98+ }
99+ }
100+ }
101+ return ZE_RESULT_SUCCESS;
102+ }
103+
104+ template <typename TagSizeT>
105+ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
106+ assignKernelEventCompletionData (hostAddress);
107+ uint32_t queryVal = Event::STATE_CLEARED;
91108 for (uint32_t i = 0 ; i < kernelCount; i++) {
92- uint32_t packetsToCheck = kernelTimestampsData [i].getPacketsUsed ();
109+ uint32_t packetsToCheck = kernelEventCompletionData [i].getPacketsUsed ();
93110 for (uint32_t packetId = 0 ; packetId < packetsToCheck; packetId++) {
94- if (kernelTimestampsData [i].getContextEndValue (packetId) == Event::STATE_CLEARED ) {
111+ if (kernelEventCompletionData [i].getContextStartValue (packetId) == queryVal ) {
95112 return ZE_RESULT_NOT_READY;
96113 }
97114 }
@@ -102,17 +119,16 @@ ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
102119template <typename TagSizeT>
103120ze_result_t EventImp<TagSizeT>::queryStatus() {
104121 uint64_t *hostAddr = static_cast <uint64_t *>(hostAddress);
105- uint32_t queryVal = Event::STATE_CLEARED;
106122
107123 if (metricStreamer != nullptr ) {
108124 *hostAddr = metricStreamer->getNotificationState ();
109125 }
110126 this ->csr ->downloadAllocations ();
111127 if (isEventTimestampFlagSet ()) {
112128 return queryStatusKernelTimestamp ();
129+ } else {
130+ return queryStatusNonTimestamp ();
113131 }
114- memcpy_s (static_cast <void *>(&queryVal), sizeof (uint32_t ), static_cast <void *>(hostAddr), sizeof (uint32_t ));
115- return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
116132}
117133
118134template <typename TagSizeT>
@@ -130,7 +146,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
130146 }
131147 };
132148 for (uint32_t i = 0 ; i < kernelCount; i++) {
133- uint32_t packetsToSet = kernelTimestampsData [i].getPacketsUsed ();
149+ uint32_t packetsToSet = kernelEventCompletionData [i].getPacketsUsed ();
134150 for (uint32_t j = 0 ; j < packetsToSet; j++) {
135151 eventTsSetFunc (baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset ());
136152 eventTsSetFunc (baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset ());
@@ -139,7 +155,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
139155 baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
140156 }
141157 }
142- assignTimestampData (hostAddress);
158+ assignKernelEventCompletionData (hostAddress);
143159
144160 return ZE_RESULT_SUCCESS;
145161}
@@ -208,14 +224,12 @@ ze_result_t EventImp<TagSizeT>::reset() {
208224 if (isEventTimestampFlagSet ()) {
209225 kernelCount = EventPacketsCount::maxKernelSplit;
210226 for (uint32_t i = 0 ; i < kernelCount; i++) {
211- kernelTimestampsData [i].setPacketsUsed (NEO::TimestampPacketSizeControl::preferredPacketCount);
227+ kernelEventCompletionData [i].setPacketsUsed (NEO::TimestampPacketSizeControl::preferredPacketCount);
212228 }
213- hostEventSetValue (Event::STATE_INITIAL);
214- resetPackets ();
215- return ZE_RESULT_SUCCESS;
216- } else {
217- return hostEventSetValue (Event::STATE_INITIAL);
218229 }
230+ hostEventSetValue (Event::STATE_INITIAL);
231+ resetPackets ();
232+ return ZE_RESULT_SUCCESS;
219233}
220234
221235template <typename TagSizeT>
@@ -227,7 +241,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
227241 return ZE_RESULT_NOT_READY;
228242 }
229243
230- assignTimestampData (hostAddress);
244+ assignKernelEventCompletionData (hostAddress);
231245 calculateProfilingData ();
232246
233247 auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) {
@@ -266,7 +280,7 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
266280 }
267281
268282 if ((*pCount == 0 ) ||
269- (*pCount > kernelTimestampsData [timestampPacket].getPacketsUsed ())) {
283+ (*pCount > kernelEventCompletionData [timestampPacket].getPacketsUsed ())) {
270284 *pCount = this ->getPacketsInUse ();
271285 return ZE_RESULT_SUCCESS;
272286 }
@@ -278,10 +292,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
278292 memcpy_s (×tampFieldForWriting, sizeof (uint64_t ), static_cast <void *>(×tampFieldToCopy), sizeof (uint64_t ));
279293 };
280294
281- globalStartTs = kernelTimestampsData [timestampPacket].getGlobalStartValue (packetId);
282- contextStartTs = kernelTimestampsData [timestampPacket].getContextStartValue (packetId);
283- contextEndTs = kernelTimestampsData [timestampPacket].getContextEndValue (packetId);
284- globalEndTs = kernelTimestampsData [timestampPacket].getGlobalEndValue (packetId);
295+ globalStartTs = kernelEventCompletionData [timestampPacket].getGlobalStartValue (packetId);
296+ contextStartTs = kernelEventCompletionData [timestampPacket].getContextStartValue (packetId);
297+ contextEndTs = kernelEventCompletionData [timestampPacket].getContextEndValue (packetId);
298+ globalEndTs = kernelEventCompletionData [timestampPacket].getGlobalEndValue (packetId);
285299
286300 queryTsEventAssignFunc (result.global .kernelStart , globalStartTs);
287301 queryTsEventAssignFunc (result.context .kernelStart , contextStartTs);
@@ -295,37 +309,31 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
295309template <typename TagSizeT>
296310void EventImp<TagSizeT>::resetPackets() {
297311 for (uint32_t i = 0 ; i < kernelCount; i++) {
298- kernelTimestampsData [i].setPacketsUsed (1 );
312+ kernelEventCompletionData [i].setPacketsUsed (1 );
299313 }
300314 kernelCount = 1 ;
301315}
302316
303317template <typename TagSizeT>
304318uint32_t EventImp<TagSizeT>::getPacketsInUse() {
305- if (isEventTimestampFlagSet ()) {
306- uint32_t packetsInUse = 0 ;
307- for (uint32_t i = 0 ; i < kernelCount; i++) {
308- packetsInUse += kernelTimestampsData[i].getPacketsUsed ();
309- };
310- return packetsInUse;
311- } else {
312- return 1 ;
319+ uint32_t packetsInUse = 0 ;
320+ for (uint32_t i = 0 ; i < kernelCount; i++) {
321+ packetsInUse += kernelEventCompletionData[i].getPacketsUsed ();
313322 }
323+ return packetsInUse;
314324}
315325
316326template <typename TagSizeT>
317327void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
318- kernelTimestampsData [getCurrKernelDataIndex ()].setPacketsUsed (value);
319- };
328+ kernelEventCompletionData [getCurrKernelDataIndex ()].setPacketsUsed (value);
329+ }
320330
321331template <typename TagSizeT>
322332uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
323333 uint64_t address = getGpuAddress (device);
324- if (isEventTimestampFlagSet () && kernelCount > 1 ) {
325- for (uint32_t i = 0 ; i < kernelCount - 1 ; i++) {
326- address += kernelTimestampsData[i].getPacketsUsed () *
327- NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
328- }
334+ for (uint32_t i = 0 ; i < kernelCount - 1 ; i++) {
335+ address += kernelEventCompletionData[i].getPacketsUsed () *
336+ NEO::TimestampPackets<TagSizeT>::getSinglePacketSize ();
329337 }
330338 return address;
331339}
0 commit comments