Skip to content

Commit c051495

Browse files
SW WA to add PIPE_CONTROL with dcFlush enabled when event scope is host/device
Related-To: LOCI-2361 Signed-off-by: Vinod Tipparaju <[email protected]> Signed-off-by: Aravind Gopalakrishnan <[email protected]>
1 parent f2eb7f3 commit c051495

File tree

4 files changed

+94
-70
lines changed

4 files changed

+94
-70
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,24 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
230230
isCooperative);
231231
if (hEvent) {
232232
auto event = Event::fromHandle(hEvent);
233-
if (isTimestampEvent && partitionCount > 1) {
233+
if (partitionCount > 1) {
234234
event->setPacketsInUse(partitionCount);
235235
}
236+
if (L3FlushEnable) {
237+
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
238+
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
239+
auto &hwHelper = this->device->getHwHelper();
240+
eventAddress = event->getPacketAddress(this->device) + hwHelper.getSingleTimestampPacketSize();
241+
event->setPacketsInUse(event->getPacketsInUse() + 1);
242+
243+
NEO::PipeControlArgs args;
244+
args.dcFlushEnable = L3FlushEnable;
245+
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
246+
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
247+
eventAddress, Event::STATE_SIGNALED,
248+
commandContainer.getDevice()->getHardwareInfo(),
249+
args);
250+
}
236251
}
237252

238253
if (neoDevice->getDebugger()) {

level_zero/core/source/event/event.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ struct Event : _ze_event_handle_t {
9595
};
9696

9797
template <typename TagSizeT>
98-
class KernelTimestampsData : public NEO::TimestampPackets<TagSizeT> {
98+
class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT> {
9999
public:
100100
uint32_t getPacketsUsed() const { return packetsUsed; }
101101
void setPacketsUsed(uint32_t value) { packetsUsed = value; }
@@ -139,7 +139,7 @@ struct EventImp : public Event {
139139
size_t getSinglePacketSize() const override { return NEO::TimestampPackets<TagSizeT>::getSinglePacketSize(); };
140140
ze_result_t hostEventSetValue(uint32_t eventValue) override;
141141

142-
std::unique_ptr<KernelTimestampsData<TagSizeT>[]> kernelTimestampsData;
142+
std::unique_ptr<KernelEventCompletionData<TagSizeT>[]> kernelEventCompletionData;
143143

144144
Device *device;
145145
int index;
@@ -148,8 +148,9 @@ struct EventImp : public Event {
148148
protected:
149149
ze_result_t calculateProfilingData();
150150
ze_result_t queryStatusKernelTimestamp();
151+
ze_result_t queryStatusNonTimestamp();
151152
ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal);
152-
void assignTimestampData(void *address);
153+
void assignKernelEventCompletionData(void *address);
153154
};
154155

155156
struct EventPool : _ze_event_pool_handle_t {

level_zero/core/source/event/event_impl.inl

Lines changed: 61 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
1515

1616
if (eventPool->isEventPoolTimestampFlagSet()) {
1717
event->setEventTimestampFlag(true);
18-
event->kernelTimestampsData = std::make_unique<KernelTimestampsData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
1918
}
19+
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
2020

2121
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
2222

@@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp<TagSizeT>::getAllocation(Device *device) {
4949

5050
template <typename TagSizeT>
5151
ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
52-
globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0);
53-
globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0);
54-
contextStartTS = kernelTimestampsData[0].getContextStartValue(0);
55-
contextEndTS = kernelTimestampsData[0].getContextEndValue(0);
52+
globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0);
53+
globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0);
54+
contextStartTS = kernelEventCompletionData[0].getContextStartValue(0);
55+
contextEndTS = kernelEventCompletionData[0].getContextEndValue(0);
5656

5757
for (uint32_t i = 0; i < kernelCount; i++) {
58-
for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) {
59-
if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) {
60-
globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId);
58+
for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) {
59+
if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) {
60+
globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId);
6161
}
62-
if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) {
63-
contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId);
62+
if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) {
63+
contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId);
6464
}
65-
if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) {
66-
contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId);
65+
if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) {
66+
contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId);
6767
}
68-
if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) {
69-
globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId);
68+
if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) {
69+
globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId);
7070
}
7171
}
7272
}
@@ -75,23 +75,40 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
7575
}
7676

7777
template <typename TagSizeT>
78-
void EventImp<TagSizeT>::assignTimestampData(void *address) {
78+
void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
7979
for (uint32_t i = 0; i < kernelCount; i++) {
80-
uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed();
80+
uint32_t packetsToCopy = 0;
81+
packetsToCopy = kernelEventCompletionData[i].getPacketsUsed();
8182
for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) {
82-
kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address);
83+
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
8384
address = ptrOffset(address, NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
8485
}
8586
}
8687
}
8788

8889
template <typename TagSizeT>
8990
ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
90-
assignTimestampData(hostAddress);
91+
assignKernelEventCompletionData(hostAddress);
92+
uint32_t queryVal = Event::STATE_CLEARED;
93+
for (uint32_t i = 0; i < kernelCount; i++) {
94+
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
95+
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
96+
if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) {
97+
return ZE_RESULT_NOT_READY;
98+
}
99+
}
100+
}
101+
return ZE_RESULT_SUCCESS;
102+
}
103+
104+
template <typename TagSizeT>
105+
ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
106+
assignKernelEventCompletionData(hostAddress);
107+
uint32_t queryVal = Event::STATE_CLEARED;
91108
for (uint32_t i = 0; i < kernelCount; i++) {
92-
uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed();
109+
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
93110
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
94-
if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) {
111+
if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) {
95112
return ZE_RESULT_NOT_READY;
96113
}
97114
}
@@ -102,17 +119,16 @@ ze_result_t EventImp<TagSizeT>::queryStatusKernelTimestamp() {
102119
template <typename TagSizeT>
103120
ze_result_t EventImp<TagSizeT>::queryStatus() {
104121
uint64_t *hostAddr = static_cast<uint64_t *>(hostAddress);
105-
uint32_t queryVal = Event::STATE_CLEARED;
106122

107123
if (metricStreamer != nullptr) {
108124
*hostAddr = metricStreamer->getNotificationState();
109125
}
110126
this->csr->downloadAllocations();
111127
if (isEventTimestampFlagSet()) {
112128
return queryStatusKernelTimestamp();
129+
} else {
130+
return queryStatusNonTimestamp();
113131
}
114-
memcpy_s(static_cast<void *>(&queryVal), sizeof(uint32_t), static_cast<void *>(hostAddr), sizeof(uint32_t));
115-
return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS;
116132
}
117133

118134
template <typename TagSizeT>
@@ -130,7 +146,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
130146
}
131147
};
132148
for (uint32_t i = 0; i < kernelCount; i++) {
133-
uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed();
149+
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
134150
for (uint32_t j = 0; j < packetsToSet; j++) {
135151
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getContextStartOffset());
136152
eventTsSetFunc(baseAddr + NEO::TimestampPackets<TagSizeT>::getGlobalStartOffset());
@@ -139,7 +155,7 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
139155
baseAddr += NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
140156
}
141157
}
142-
assignTimestampData(hostAddress);
158+
assignKernelEventCompletionData(hostAddress);
143159

144160
return ZE_RESULT_SUCCESS;
145161
}
@@ -208,14 +224,12 @@ ze_result_t EventImp<TagSizeT>::reset() {
208224
if (isEventTimestampFlagSet()) {
209225
kernelCount = EventPacketsCount::maxKernelSplit;
210226
for (uint32_t i = 0; i < kernelCount; i++) {
211-
kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
227+
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
212228
}
213-
hostEventSetValue(Event::STATE_INITIAL);
214-
resetPackets();
215-
return ZE_RESULT_SUCCESS;
216-
} else {
217-
return hostEventSetValue(Event::STATE_INITIAL);
218229
}
230+
hostEventSetValue(Event::STATE_INITIAL);
231+
resetPackets();
232+
return ZE_RESULT_SUCCESS;
219233
}
220234

221235
template <typename TagSizeT>
@@ -227,7 +241,7 @@ ze_result_t EventImp<TagSizeT>::queryKernelTimestamp(ze_kernel_timestamp_result_
227241
return ZE_RESULT_NOT_READY;
228242
}
229243

230-
assignTimestampData(hostAddress);
244+
assignKernelEventCompletionData(hostAddress);
231245
calculateProfilingData();
232246

233247
auto eventTsSetFunc = [&](uint64_t &timestampFieldToCopy, uint64_t &timestampFieldForWriting) {
@@ -266,7 +280,7 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
266280
}
267281

268282
if ((*pCount == 0) ||
269-
(*pCount > kernelTimestampsData[timestampPacket].getPacketsUsed())) {
283+
(*pCount > kernelEventCompletionData[timestampPacket].getPacketsUsed())) {
270284
*pCount = this->getPacketsInUse();
271285
return ZE_RESULT_SUCCESS;
272286
}
@@ -278,10 +292,10 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
278292
memcpy_s(&timestampFieldForWriting, sizeof(uint64_t), static_cast<void *>(&timestampFieldToCopy), sizeof(uint64_t));
279293
};
280294

281-
globalStartTs = kernelTimestampsData[timestampPacket].getGlobalStartValue(packetId);
282-
contextStartTs = kernelTimestampsData[timestampPacket].getContextStartValue(packetId);
283-
contextEndTs = kernelTimestampsData[timestampPacket].getContextEndValue(packetId);
284-
globalEndTs = kernelTimestampsData[timestampPacket].getGlobalEndValue(packetId);
295+
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);
296+
contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId);
297+
contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId);
298+
globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId);
285299

286300
queryTsEventAssignFunc(result.global.kernelStart, globalStartTs);
287301
queryTsEventAssignFunc(result.context.kernelStart, contextStartTs);
@@ -295,37 +309,31 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
295309
template <typename TagSizeT>
296310
void EventImp<TagSizeT>::resetPackets() {
297311
for (uint32_t i = 0; i < kernelCount; i++) {
298-
kernelTimestampsData[i].setPacketsUsed(1);
312+
kernelEventCompletionData[i].setPacketsUsed(1);
299313
}
300314
kernelCount = 1;
301315
}
302316

303317
template <typename TagSizeT>
304318
uint32_t EventImp<TagSizeT>::getPacketsInUse() {
305-
if (isEventTimestampFlagSet()) {
306-
uint32_t packetsInUse = 0;
307-
for (uint32_t i = 0; i < kernelCount; i++) {
308-
packetsInUse += kernelTimestampsData[i].getPacketsUsed();
309-
};
310-
return packetsInUse;
311-
} else {
312-
return 1;
319+
uint32_t packetsInUse = 0;
320+
for (uint32_t i = 0; i < kernelCount; i++) {
321+
packetsInUse += kernelEventCompletionData[i].getPacketsUsed();
313322
}
323+
return packetsInUse;
314324
}
315325

316326
template <typename TagSizeT>
317327
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
318-
kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value);
319-
};
328+
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
329+
}
320330

321331
template <typename TagSizeT>
322332
uint64_t EventImp<TagSizeT>::getPacketAddress(Device *device) {
323333
uint64_t address = getGpuAddress(device);
324-
if (isEventTimestampFlagSet() && kernelCount > 1) {
325-
for (uint32_t i = 0; i < kernelCount - 1; i++) {
326-
address += kernelTimestampsData[i].getPacketsUsed() *
327-
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
328-
}
334+
for (uint32_t i = 0; i < kernelCount - 1; i++) {
335+
address += kernelEventCompletionData[i].getPacketsUsed() *
336+
NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
329337
}
330338
return address;
331339
}

level_zero/core/test/unit_tests/sources/event/test_event.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -640,15 +640,15 @@ TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventF
640640
}
641641

642642
TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) {
643-
EXPECT_NE(nullptr, event->kernelTimestampsData);
643+
EXPECT_NE(nullptr, event->kernelEventCompletionData);
644644
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
645645
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
646-
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextStartValue(i));
647-
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalStartValue(i));
648-
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextEndValue(i));
649-
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalEndValue(i));
646+
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i));
647+
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i));
648+
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i));
649+
EXPECT_EQ(static_cast<uint64_t>(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i));
650650
}
651-
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
651+
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
652652
}
653653

654654
EXPECT_EQ(1u, event->kernelCount);
@@ -692,7 +692,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO
692692
}
693693

694694
TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) {
695-
EXPECT_NE(nullptr, event->kernelTimestampsData);
695+
EXPECT_NE(nullptr, event->kernelEventCompletionData);
696696
event->hostSignal();
697697
ze_result_t result = event->queryStatus();
698698
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -702,12 +702,12 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec
702702
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
703703
for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) {
704704
for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) {
705-
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextStartValue(i));
706-
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalStartValue(i));
707-
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextEndValue(i));
708-
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalEndValue(i));
705+
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i));
706+
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i));
707+
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i));
708+
EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i));
709709
}
710-
EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed());
710+
EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed());
711711
}
712712
EXPECT_EQ(1u, event->kernelCount);
713713
}
@@ -828,7 +828,7 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe
828828
uint32_t pCount = 2;
829829

830830
for (uint32_t packetId = 0; packetId < pCount; packetId++) {
831-
event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
831+
event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress);
832832
event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets<uint32_t>::getSinglePacketSize());
833833
}
834834

0 commit comments

Comments
 (0)