Skip to content

Commit 418a885

Browse files
Skip timestamp wait when enqueue non kernel operation
Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent 95585a8 commit 418a885

File tree

4 files changed

+48
-1
lines changed

4 files changed

+48
-1
lines changed

opencl/source/command_queue/command_queue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
401401
QueuePriority priority = QueuePriority::MEDIUM;
402402
QueueThrottle throttle = QueueThrottle::MEDIUM;
403403
EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::None;
404+
bool wasNonKernelOperationSent = false;
404405
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
405406
std::array<CopyEngineState, bcsInfoMaskSize> bcsStates = {};
406407

opencl/source/command_queue/command_queue_hw_base.inl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,14 +159,16 @@ bool CommandQueueHw<Family>::waitForTimestamps(uint32_t taskCount) {
159159
using TSPacketType = typename Family::TimestampPacketType;
160160
bool waited = false;
161161

162-
if (isWaitForTimestampsEnabled()) {
162+
if (isWaitForTimestampsEnabled() && !this->wasNonKernelOperationSent) {
163163
waited = waitForTimestampsWithinContainer<TSPacketType>(timestampPacketContainer.get(), getGpgpuCommandStreamReceiver());
164164

165165
if (isOOQEnabled()) {
166166
waited |= waitForTimestampsWithinContainer<TSPacketType>(deferredTimestampPackets.get(), getGpgpuCommandStreamReceiver());
167167
}
168168
}
169169

170+
this->wasNonKernelOperationSent = false;
171+
170172
return waited;
171173
}
172174

opencl/source/command_queue/enqueue_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
301301

302302
bool migratedMemory = false;
303303

304+
this->wasNonKernelOperationSent |= enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel;
305+
304306
if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) {
305307
for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) {
306308
MigrationController::handleMigration(*this->context, computeCommandStreamReceiver, arg.second);

opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,48 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenCallWaitUti
911911
cmdQ.reset();
912912
}
913913

914+
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitAndNonGpuKernelEnqueueWhenFinishThenDoNotCallWaitUtils) {
915+
DebugManagerStateRestore restorer;
916+
DebugManager.flags.UpdateTaskCountFromWait.set(3);
917+
DebugManager.flags.EnableTimestampWait.set(1);
918+
919+
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
920+
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
921+
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
922+
923+
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
924+
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
925+
926+
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
927+
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
928+
cmdQ->enqueueMarkerWithWaitList(0, nullptr, nullptr);
929+
cmdQ->flush();
930+
931+
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
932+
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
933+
934+
VariableBackup<volatile uint32_t *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
935+
VariableBackup<uint32_t> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
936+
VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
937+
VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
938+
939+
deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u);
940+
timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u);
941+
942+
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile uint32_t *>(const_cast<void *>(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u)));
943+
CpuIntrinsicsTests::pauseValue = 2u;
944+
CpuIntrinsicsTests::setupPauseAddress = [&]() {
945+
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile uint32_t *>(const_cast<void *>(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u)));
946+
};
947+
CpuIntrinsicsTests::pauseCounter = 0u;
948+
949+
cmdQ->finish();
950+
951+
EXPECT_EQ(0u, CpuIntrinsicsTests::pauseCounter);
952+
953+
cmdQ.reset();
954+
}
955+
914956
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingToOoqThenMoveToDeferredList) {
915957
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
916958

0 commit comments

Comments
 (0)