Skip to content

Commit b039f5c

Browse files
Optimize profiling calls.
- do not call KMD query for submitted timestamp, we only need CPU timestamp here. Change-Id: Id60c1e367d9430d893fb3a253ffc058f45fc9609 Signed-off-by: Michal Mrozek <[email protected]>
1 parent 39a930e commit b039f5c

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

opencl/source/command_queue/enqueue_common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,9 +698,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
698698
DEBUG_BREAK_IF(device->getDeviceInfo().preemptionSupported != false);
699699
}
700700

701-
TimeStampData submitTimeStamp;
701+
TimeStampData submitTimeStamp = {};
702702
if (isProfilingEnabled() && eventBuilder.getEvent()) {
703-
this->getDevice().getOSTime()->getCpuGpuTime(&submitTimeStamp);
703+
this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS);
704704
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
705705
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation());
706706
if (isPerfCountersEnabled()) {

opencl/test/unit_test/mocks/mock_ostime.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,15 @@
99
#include "shared/source/os_interface/os_time.h"
1010

1111
namespace NEO {
12+
static int PerfTicks = 0;
1213
class MockOSTime : public OSTime {
1314
public:
1415
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
15-
static int PerfTicks = 0;
1616
pGpuCpuTime->GPUTimeStamp = ++PerfTicks;
1717
pGpuCpuTime->CPUTimeinNS = PerfTicks;
1818
return true;
1919
}
2020
bool getCpuTime(uint64_t *timeStamp) override {
21-
static int PerfTicks = 0;
2221
*timeStamp = ++PerfTicks;
2322
return true;
2423
};

opencl/test/unit_test/profiling/profiling_tests.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,35 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa
195195
clReleaseEvent(event);
196196
}
197197

198+
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) {
199+
MockKernel kernel(program.get(), kernelInfo, *pClDevice);
200+
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
201+
202+
size_t globalOffsets[3] = {0, 0, 0};
203+
size_t workItems[3] = {1, 1, 1};
204+
uint32_t dimensions = 1;
205+
cl_event event;
206+
cl_kernel clKernel = &kernel;
207+
208+
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueKernel(
209+
clKernel,
210+
dimensions,
211+
globalOffsets,
212+
workItems,
213+
nullptr,
214+
0,
215+
nullptr,
216+
&event);
217+
218+
auto mockEvent = static_cast<MockEvent<Event> *>(event);
219+
EXPECT_NE(0u, mockEvent->queueTimeStamp.GPUTimeStamp);
220+
EXPECT_NE(0u, mockEvent->queueTimeStamp.CPUTimeinNS);
221+
EXPECT_LT(mockEvent->queueTimeStamp.CPUTimeinNS, mockEvent->submitTimeStamp.CPUTimeinNS);
222+
EXPECT_EQ(0u, mockEvent->submitTimeStamp.GPUTimeStamp);
223+
224+
clReleaseEvent(event);
225+
}
226+
198227
/*
199228
# One additional MI_STORE_REGISTER_MEM is expected before and after GPGPU_WALKER.
200229
*/

0 commit comments

Comments
 (0)