Skip to content

Commit e64450a

Browse files
feature: unregister CSR client on Event host synchronize
Related-To: NEO-7458 Source: aea5f43 Signed-off-by: Dunajski, Bartosz <[email protected]>
1 parent bd765cc commit e64450a

File tree

10 files changed

+145
-6
lines changed

10 files changed

+145
-6
lines changed

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
722722
template <GFXCORE_FAMILY gfxCoreFamily>
723723
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
724724
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
725+
auto signalEvent = Event::fromHandle(hSignalEvent);
726+
725727
if (inputRet == ZE_RESULT_SUCCESS) {
726728
if (isInOrderExecutionEnabled()) {
727729
auto node = this->timestampPacketContainer->peekNodes()[0];
@@ -730,14 +732,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
730732
}
731733

732734
if (this->isFlushTaskSubmissionEnabled) {
735+
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
736+
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
737+
}
733738
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
734739
} else {
735740
inputRet = executeCommandListImmediate(performMigration);
736741
}
737742
}
738743

739-
auto signalEvent = Event::fromHandle(hSignalEvent);
740-
741744
if (signalEvent) {
742745
signalEvent->setCsr(this->csr);
743746

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
5353
}
5454

5555
ze_result_t CommandQueueImp::destroy() {
56-
if (this->clientId != CommandQueue::clientNotRegistered) {
57-
this->csr->unregisterClient();
58-
}
56+
unregisterCsrClient();
5957

6058
if (commandStream.getCpuBase() != nullptr) {
6159
commandStream.replaceGraphicsAllocation(nullptr);
@@ -251,6 +249,13 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::
251249
return commandQueue;
252250
}
253251

252+
void CommandQueueImp::unregisterCsrClient() {
253+
if (getClientId() != CommandQueue::clientNotRegistered) {
254+
this->csr->unregisterClient();
255+
setClientId(CommandQueue::clientNotRegistered);
256+
}
257+
}
258+
254259
ze_command_queue_mode_t CommandQueueImp::getSynchronousMode() const {
255260
return desc.mode;
256261
}

level_zero/core/source/cmdqueue/cmdqueue.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ struct CommandQueue : _ze_command_queue_handle_t {
6060

6161
bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; }
6262

63-
uint32_t getClientId() { return this->clientId; }
63+
uint32_t getClientId() const { return this->clientId; }
6464
void setClientId(uint32_t value) { this->clientId = value; }
65+
virtual void unregisterCsrClient() = 0;
6566

6667
static constexpr uint32_t clientNotRegistered = std::numeric_limits<uint32_t>::max();
6768

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ struct CommandQueueImp : public CommandQueue {
9191
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
9292
void printKernelsPrintfOutput(bool hangDetected);
9393
void checkAssert();
94+
void unregisterCsrClient() override;
9495

9596
protected:
9697
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,

level_zero/core/source/event/event.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ void Event::setIsCompleted() {
389389
if (this->isCompleted.load() == STATE_CLEARED) {
390390
this->isCompleted = STATE_SIGNALED;
391391
}
392+
unsetCmdQueue(true);
392393
}
393394

394395
void Event::enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes) {
@@ -401,4 +402,15 @@ void Event::enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSy
401402
inOrderTimestampPacket->assignAndIncrementNodesRefCounts(inOrderSyncNodes);
402403
}
403404

405+
void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) {
406+
this->latestUsedCmdQueue = newCmdQ;
407+
}
408+
409+
void Event::unsetCmdQueue(bool unregisterClient) {
410+
if (latestUsedCmdQueue && unregisterClient) {
411+
latestUsedCmdQueue->unregisterCsrClient();
412+
}
413+
latestUsedCmdQueue = nullptr;
414+
}
415+
404416
} // namespace L0

level_zero/core/source/event/event.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct EventPool;
3636
struct MetricStreamer;
3737
struct ContextImp;
3838
struct Context;
39+
struct CommandQueue;
3940
struct DriverHandle;
4041
struct DriverHandleImp;
4142
struct Device;
@@ -205,10 +206,13 @@ struct Event : _ze_event_handle_t {
205206
this->metricStreamer = metricStreamer;
206207
}
207208
void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes);
209+
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
208210

209211
protected:
210212
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
211213

214+
void unsetCmdQueue(bool unregisterClient);
215+
212216
uint64_t globalStartTS = 1;
213217
uint64_t globalEndTS = 1;
214218
uint64_t contextStartTS = 1;
@@ -237,6 +241,7 @@ struct Event : _ze_event_handle_t {
237241
EventPool *eventPool = nullptr;
238242
Kernel *kernelWithPrintf = nullptr;
239243
std::unique_ptr<NEO::TimestampPacketContainer> inOrderTimestampPacket;
244+
CommandQueue *latestUsedCmdQueue = nullptr;
240245

241246
uint32_t maxKernelCount = 0;
242247
uint32_t kernelCount = 1u;

level_zero/core/source/event/event_impl.inl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ void EventImp<TagSizeT>::handleSuccessfulHostSynchronization() {
161161
}
162162
}
163163
this->setIsCompleted();
164+
unsetCmdQueue(true);
164165
for (auto &csr : csrs) {
165166
csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION);
166167
}
@@ -396,6 +397,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
396397
inOrderExecEvent = false;
397398
inOrderTimestampPacket->releaseNodes();
398399
}
400+
unsetCmdQueue(false);
399401
this->resetCompletionStatus();
400402
this->resetDeviceCompletionData(false);
401403
this->l3FlushAppliedOnKernel.reset();

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "shared/source/indirect_heap/indirect_heap.h"
1111
#include "shared/source/memory_manager/internal_allocation_storage.h"
1212
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
13+
#include "shared/test/common/helpers/debug_manager_state_restore.h"
1314
#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h"
1415
#include "shared/test/common/helpers/unit_test_helper.h"
1516
#include "shared/test/common/libult/ult_command_stream_receiver.h"
@@ -1108,6 +1109,113 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
11081109
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.numCsrClients, ultCsr->getNumClients());
11091110
}
11101111

1112+
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
1113+
ze_command_queue_desc_t desc = {};
1114+
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
1115+
ze_result_t returnValue;
1116+
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
1117+
ASSERT_NE(nullptr, commandList);
1118+
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
1119+
1120+
Mock<::L0::Kernel> kernel;
1121+
ze_group_count_t groupCount{1, 1, 1};
1122+
CmdListKernelLaunchParams launchParams = {};
1123+
1124+
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
1125+
1126+
ze_event_pool_desc_t eventPoolDesc = {};
1127+
eventPoolDesc.count = 3;
1128+
1129+
ze_event_desc_t eventDesc = {};
1130+
1131+
ze_event_handle_t event1 = nullptr;
1132+
ze_event_handle_t event2 = nullptr;
1133+
ze_event_handle_t event3 = nullptr;
1134+
1135+
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
1136+
1137+
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event1));
1138+
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event2));
1139+
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event3));
1140+
1141+
EXPECT_EQ(ultCsr->getNumClients(), 0u);
1142+
1143+
{
1144+
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event1, 0, nullptr, launchParams, false);
1145+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1146+
1147+
Event::fromHandle(event1)->setIsCompleted();
1148+
1149+
zeEventHostSynchronize(event1, std::numeric_limits<uint64_t>::max());
1150+
EXPECT_EQ(ultCsr->getNumClients(), 0u);
1151+
}
1152+
1153+
{
1154+
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event2, 0, nullptr, launchParams, false);
1155+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1156+
1157+
*reinterpret_cast<uint32_t *>(Event::fromHandle(event2)->getHostAddress()) = static_cast<uint32_t>(Event::STATE_SIGNALED);
1158+
1159+
zeEventHostSynchronize(event2, std::numeric_limits<uint64_t>::max());
1160+
EXPECT_EQ(ultCsr->getNumClients(), 0u);
1161+
}
1162+
1163+
{
1164+
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event3, 0, nullptr, launchParams, false);
1165+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1166+
1167+
zeEventHostReset(event3);
1168+
1169+
zeEventHostSynchronize(event3, 1);
1170+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1171+
}
1172+
1173+
zeEventDestroy(event1);
1174+
zeEventDestroy(event2);
1175+
zeEventDestroy(event3);
1176+
}
1177+
1178+
HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnregister) {
1179+
DebugManagerStateRestore restore;
1180+
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
1181+
1182+
ze_command_queue_desc_t desc = {};
1183+
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
1184+
ze_result_t returnValue;
1185+
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
1186+
ASSERT_NE(nullptr, commandList);
1187+
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
1188+
1189+
Mock<::L0::Kernel> kernel;
1190+
ze_group_count_t groupCount{1, 1, 1};
1191+
CmdListKernelLaunchParams launchParams = {};
1192+
1193+
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->csr);
1194+
1195+
ze_event_pool_desc_t eventPoolDesc = {};
1196+
eventPoolDesc.count = 1;
1197+
1198+
ze_event_desc_t eventDesc = {};
1199+
1200+
ze_event_handle_t event = nullptr;
1201+
1202+
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
1203+
1204+
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
1205+
1206+
EXPECT_EQ(ultCsr->getNumClients(), 0u);
1207+
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, launchParams, false);
1208+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1209+
1210+
Event::fromHandle(event)->setIsCompleted();
1211+
1212+
zeEventHostSynchronize(event, std::numeric_limits<uint64_t>::max());
1213+
1214+
EXPECT_EQ(ultCsr->getNumClients(), 1u);
1215+
1216+
zeEventDestroy(event);
1217+
}
1218+
11111219
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
11121220
DebugManagerStateRestore restore;
11131221
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: defaul
241241
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
242242
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
243243
DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows")
244+
DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients")
244245

245246
/*LOGGING FLAGS*/
246247
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

shared/test/common/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,4 +528,5 @@ OverrideHwIpVersion = -1
528528
PrintGlobalTimestampInNs = 0
529529
EnableDeviceStateVerification = -1
530530
VfBarResourceAllocationWa = 1
531+
TrackNumCsrClientsOnSyncPoints = -1
531532
# Please don't edit below this line

0 commit comments

Comments
 (0)