Skip to content

Commit 184ec7d

Browse files
Dont allocate HwTimeStamp when TimestampPacket is used
Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent 700fad6 commit 184ec7d

File tree

4 files changed

+57
-11
lines changed

4 files changed

+57
-11
lines changed

opencl/source/command_queue/enqueue_common.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,12 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
729729
if (isProfilingEnabled() && eventBuilder.getEvent()) {
730730
this->getDevice().getOSTime()->getCpuTime(&submitTimeStamp.CPUTimeinNS);
731731
eventBuilder.getEvent()->setSubmitTimeStamp(&submitTimeStamp);
732-
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwTimeStampNode()->getBaseGraphicsAllocation());
732+
733+
auto hwTimestampNode = eventBuilder.getEvent()->getHwTimeStampNode();
734+
if (hwTimestampNode) {
735+
getGpgpuCommandStreamReceiver().makeResident(*hwTimestampNode->getBaseGraphicsAllocation());
736+
}
737+
733738
if (isPerfCountersEnabled()) {
734739
getGpgpuCommandStreamReceiver().makeResident(*eventBuilder.getEvent()->getHwPerfCounterNode()->getBaseGraphicsAllocation());
735740
}

opencl/source/event/event.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2017-2020 Intel Corporation
2+
* Copyright (C) 2017-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -737,7 +737,7 @@ void Event::setEndTimeStamp() {
737737
}
738738

739739
TagNode<HwTimeStamps> *Event::getHwTimeStampNode() {
740-
if (!timeStampNode) {
740+
if (!cmdQueue->getTimestampPacketContainer() && !timeStampNode) {
741741
timeStampNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventTsAllocator()->getTag();
742742
}
743743
return timeStampNode;

opencl/test/unit_test/event/event_tests.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -866,8 +866,13 @@ HWTEST_F(InternalsEventWithPerfCountersTest, givenCpuProfilingPerfCountersPathWh
866866
event->setCPUProfilingPath(true);
867867
HwPerfCounter *perfCounter = event->getHwPerfCounterNode()->tagForCpuAccess;
868868
ASSERT_NE(nullptr, perfCounter);
869-
HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess;
870-
ASSERT_NE(nullptr, timeStamps);
869+
870+
auto hwTimeStampNode = event->getHwTimeStampNode();
871+
if (pCmdQ->getTimestampPacketContainer()) {
872+
EXPECT_EQ(nullptr, hwTimeStampNode);
873+
} else {
874+
ASSERT_NE(nullptr, hwTimeStampNode->tagForCpuAccess);
875+
}
871876

872877
event->setCommand(std::unique_ptr<Command>(new CommandWithoutKernel(*pCmdQ)));
873878

@@ -1109,8 +1114,12 @@ TEST_F(EventTest, GivenNoQueueWhenSettingCpuTimeStampThenTimesIsNotSet) {
11091114
EXPECT_EQ(0ULL, outCPUtimeStamp);
11101115
}
11111116

1112-
TEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {
1113-
std::unique_ptr<Event> event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0));
1117+
HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {
1118+
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
1119+
1120+
auto myCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
1121+
1122+
std::unique_ptr<Event> event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
11141123
ASSERT_NE(nullptr, event);
11151124

11161125
HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess;
@@ -1130,8 +1139,12 @@ TEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {
11301139
ASSERT_EQ(timeStamps, timeStamps2);
11311140
}
11321141

1133-
TEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) {
1134-
std::unique_ptr<Event> event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0));
1142+
HWTEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) {
1143+
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
1144+
1145+
auto myCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
1146+
1147+
std::unique_ptr<Event> event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
11351148
ASSERT_NE(nullptr, event);
11361149

11371150
GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation();
@@ -1144,8 +1157,12 @@ TEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) {
11441157
EXPECT_GT(memoryStorageSize, 0u);
11451158
}
11461159

1147-
TEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) {
1148-
std::unique_ptr<Event> event(new Event(this->pCmdQ, CL_COMMAND_COPY_BUFFER, 0, 0));
1160+
HWTEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAllocation) {
1161+
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
1162+
1163+
auto myCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
1164+
1165+
std::unique_ptr<Event> event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
11491166
ASSERT_NE(nullptr, event);
11501167

11511168
HwTimeStamps *timeStamps = event->getHwTimeStampNode()->tagForCpuAccess;

opencl/test/unit_test/profiling/profiling_tests.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,8 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount
888888

889889
template <typename TagType>
890890
struct FixedGpuAddressTagAllocator : TagAllocator<TagType> {
891+
using TagAllocator<TagType>::usedTags;
892+
using TagAllocator<TagType>::deferredTags;
891893

892894
struct MockTagNode : TagNode<TagType> {
893895
void setGpuAddress(uint64_t value) { this->gpuAddress = value; }
@@ -936,6 +938,28 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit
936938
clReleaseEvent(event);
937939
}
938940

941+
HWTEST_F(ProfilingWithPerfCountersTests, givenTimestampPacketsEnabledWhenEnqueueIsCalledThenDontAllocateHwTimeStamps) {
942+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
943+
csr.timestampPacketWriteEnabled = true;
944+
945+
auto mockAllocator = new FixedGpuAddressTagAllocator<HwTimeStamps>(csr, 0x123);
946+
csr.profilingTimeStampAllocator.reset(mockAllocator);
947+
948+
auto myCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice.get(), nullptr);
949+
myCmdQ->setProfilingEnabled();
950+
951+
size_t globalOffsets[3] = {0, 0, 0};
952+
size_t workItems[3] = {1, 1, 1};
953+
cl_event event;
954+
955+
myCmdQ->enqueueKernel(kernel->mockKernel, 1, globalOffsets, workItems, nullptr, 0, nullptr, &event);
956+
957+
EXPECT_EQ(!!myCmdQ->getTimestampPacketContainer(), mockAllocator->usedTags.peekIsEmpty());
958+
EXPECT_TRUE(mockAllocator->deferredTags.peekIsEmpty());
959+
960+
clReleaseEvent(event);
961+
}
962+
939963
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersOnCCSTests, givenCommandQueueBlockedWithProfilingPerfCountersWhenWalkerIsDispatchedThenPipeControlWithTimeStampIsPresentInCS) {
940964
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
941965
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;

0 commit comments

Comments
 (0)