Skip to content

Commit efd3cc9

Browse files
fix: correct calculating submit cpu timestamp
Related-To: NEO-9590, HSD-18035219634 Signed-off-by: Mateusz Jablonski <[email protected]> Source: aac8686
1 parent 032359d commit efd3cc9

File tree

6 files changed

+43
-24
lines changed

6 files changed

+43
-24
lines changed

opencl/source/command_queue/cpu_data_transfer_handler.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
9696
bool modifySimulationFlags = false;
9797

9898
if (outEventObj) {
99-
TimeStampData submitTimeStamp;
100-
getDevice().getOSTime()->getGpuCpuTime(&submitTimeStamp);
101-
outEventObj->setSubmitTimeStamp(submitTimeStamp);
99+
outEventObj->setSubmitTimeStamp();
102100
}
103101
// wait for the completness of previous commands
104102
if (transferProperties.finishRequired) {

opencl/source/command_queue/enqueue_common.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
420420
completionStamp.taskLevel = taskLevel;
421421

422422
if (eventBuilder.getEvent() && isProfilingEnabled()) {
423-
TimeStampData submitTimeStamp;
424-
getDevice().getOSTime()->getGpuCpuTime(&submitTimeStamp);
425-
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
423+
eventBuilder.getEvent()->setSubmitTimeStamp();
426424
eventBuilder.getEvent()->setStartTimeStamp();
427425
}
428426

@@ -846,9 +844,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
846844
}
847845

848846
if (isProfilingEnabled() && eventBuilder.getEvent()) {
849-
TimeStampData submitTimeStamp;
850-
getDevice().getOSTime()->getGpuCpuTime(&submitTimeStamp);
851-
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
847+
eventBuilder.getEvent()->setSubmitTimeStamp();
852848

853849
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();
854850
if (hwTimestampNode) {
@@ -1113,9 +1109,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
11131109
}
11141110

11151111
if (eventBuilder.getEvent() && isProfilingEnabled()) {
1116-
TimeStampData submitTimeStamp;
1117-
getDevice().getOSTime()->getGpuCpuTime(&submitTimeStamp);
1118-
eventBuilder.getEvent()->setSubmitTimeStamp(submitTimeStamp);
1112+
eventBuilder.getEvent()->setSubmitTimeStamp();
11191113
eventBuilder.getEvent()->setStartTimeStamp();
11201114
}
11211115

opencl/source/event/event.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,15 +291,18 @@ void Event::setupRelativeProfilingInfo(ProfilingInfo &profilingInfo) {
291291
}
292292
}
293293

294-
void Event::setSubmitTimeStamp(const TimeStampData &submitTimeStamp) {
294+
void Event::setSubmitTimeStamp() {
295295
UNRECOVERABLE_IF(!cmdQueue);
296296
auto &device = cmdQueue->getDevice();
297297
auto &gfxCoreHelper = device.getGfxCoreHelper();
298298
double resolution = device.getDeviceInfo().profilingTimerResolution;
299299
UNRECOVERABLE_IF(resolution == 0.0);
300-
this->submitTimeStamp.cpuTimeInNs = submitTimeStamp.cpuTimeinNS;
301-
this->submitTimeStamp.gpuTimeInNs = gfxCoreHelper.getGpuTimeStampInNS(submitTimeStamp.gpuTimeStamp, resolution);
302-
this->submitTimeStamp.gpuTimeStamp = submitTimeStamp.gpuTimeStamp;
300+
301+
this->cmdQueue->getDevice().getOSTime()->getCpuTime(&this->submitTimeStamp.cpuTimeInNs);
302+
TimeStampData submitCpuGpuTime{};
303+
this->cmdQueue->getDevice().getOSTime()->getGpuCpuTime(&submitCpuGpuTime);
304+
this->submitTimeStamp.gpuTimeInNs = gfxCoreHelper.getGpuTimeStampInNS(submitCpuGpuTime.gpuTimeStamp, resolution);
305+
this->submitTimeStamp.gpuTimeStamp = submitCpuGpuTime.gpuTimeStamp;
303306

304307
setupRelativeProfilingInfo(queueTimeStamp);
305308
}
@@ -608,9 +611,7 @@ void Event::submitCommand(bool abortTasks) {
608611
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation());
609612
cmdToProcess->timestamp = timeStampNode;
610613
}
611-
TimeStampData submitTimeStamp{};
612-
this->cmdQueue->getDevice().getOSTime()->getGpuCpuTime(&submitTimeStamp);
613-
this->setSubmitTimeStamp(submitTimeStamp);
614+
this->setSubmitTimeStamp();
614615
if (profilingCpuPath) {
615616
setStartTimeStamp();
616617
} else {

opencl/source/event/event.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
294294
}
295295

296296
void setQueueTimeStamp();
297-
void setSubmitTimeStamp(const TimeStampData &submitTimeStamp);
297+
void setSubmitTimeStamp();
298298
void setStartTimeStamp();
299299
void setEndTimeStamp();
300300

opencl/test/unit_test/event/event_tests.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1357,7 +1357,14 @@ TEST_F(EventTest, whenSettingSubmitTimestampThenQueueAndSubmitTimestampsAreSet)
13571357
submitTimeStamp.cpuTimeinNS = cpuTimeStamp + expectedCpuDiff;
13581358
submitTimeStamp.gpuTimeStamp = expectedQueueGpuTimeStamp + expectedGpuDiff;
13591359

1360-
event.setSubmitTimeStamp(submitTimeStamp);
1360+
auto osTime = static_cast<MockOSTime *>(pDevice->getOSTime());
1361+
osTime->cpuTimeResult = submitTimeStamp.cpuTimeinNS;
1362+
1363+
auto deviceTime = static_cast<MockDeviceTime *>(osTime->deviceTime.get());
1364+
deviceTime->cpuTimeResult = submitTimeStamp.cpuTimeinNS;
1365+
deviceTime->gpuTimeStampResult = submitTimeStamp.gpuTimeStamp;
1366+
1367+
event.setSubmitTimeStamp();
13611368

13621369
EXPECT_EQ(expectedQueueGpuTimeStamp, event.queueTimeStamp.gpuTimeStamp);
13631370
EXPECT_EQ(cpuTimeStamp, event.queueTimeStamp.cpuTimeInNs);

shared/test/common/mocks/mock_ostime.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,19 @@ namespace NEO {
1414
static std::atomic<int> PerfTicks{0};
1515
constexpr uint64_t convertToNs = 100;
1616
class MockDeviceTime : public DeviceTime {
17+
public:
18+
~MockDeviceTime() override = default;
1719
bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override {
18-
pGpuCpuTime->gpuTimeStamp = ++PerfTicks;
19-
pGpuCpuTime->cpuTimeinNS = PerfTicks * convertToNs;
20+
if (gpuTimeStampResult) {
21+
pGpuCpuTime->gpuTimeStamp = *gpuTimeStampResult;
22+
} else {
23+
pGpuCpuTime->gpuTimeStamp = ++PerfTicks;
24+
}
25+
if (cpuTimeResult) {
26+
pGpuCpuTime->cpuTimeinNS = *cpuTimeResult;
27+
} else {
28+
pGpuCpuTime->cpuTimeinNS = PerfTicks * convertToNs;
29+
}
2030
return true;
2131
}
2232

@@ -27,16 +37,24 @@ class MockDeviceTime : public DeviceTime {
2737
uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override {
2838
return static_cast<uint64_t>(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo));
2939
}
40+
std::optional<uint64_t> gpuTimeStampResult{};
41+
std::optional<uint64_t> cpuTimeResult{};
3042
};
3143

3244
class MockOSTime : public OSTime {
3345
public:
46+
using OSTime::deviceTime;
3447
MockOSTime() {
3548
this->deviceTime = std::make_unique<MockDeviceTime>();
3649
}
50+
~MockOSTime() override = default;
3751

3852
bool getCpuTime(uint64_t *timeStamp) override {
39-
*timeStamp = ++PerfTicks * convertToNs;
53+
if (cpuTimeResult) {
54+
*timeStamp = *cpuTimeResult;
55+
} else {
56+
*timeStamp = ++PerfTicks * convertToNs;
57+
}
4058
return true;
4159
};
4260
double getHostTimerResolution() const override {
@@ -49,6 +67,7 @@ class MockOSTime : public OSTime {
4967
static std::unique_ptr<OSTime> create() {
5068
return std::unique_ptr<OSTime>(new MockOSTime());
5169
}
70+
std::optional<uint64_t> cpuTimeResult{};
5271
};
5372

5473
class MockDeviceTimeWithConstTimestamp : public DeviceTime {

0 commit comments

Comments
 (0)