Skip to content

Commit 4a905ca

Browse files
[SYCL] Cherry-pick fixes for UR L0 v1 adapter memory leaks (#20253)
This is a joint cherry-pick of #18325 and #19827 --- [UR][L0] Event pool cache leak fix (#18325) addresses event pool leak when SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING is set to 1 --- [UR][L0] Event cleanup in urEnqueueKernelLaunch (#19827) When using internal events we do not clean up after execution. In the case of repeated call to urEnqueueKernelLaunch we eventually return UR_RESULT_ERROR_OUT_OF_RESOURCES. --------- Patch-by: Zhang, Winston <[email protected]>
1 parent 139c14f commit 4a905ca

File tree

5 files changed

+117
-22
lines changed

5 files changed

+117
-22
lines changed

unified-runtime/source/adapters/level_zero/context.cpp

Lines changed: 99 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -406,15 +406,89 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
406406
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
407407
bool ProfilingEnabled, ur_device_handle_t Device,
408408
bool CounterBasedEventEnabled, bool UsingImmCmdList,
409-
bool InterruptBasedEventEnabled) {
410-
// Lock while updating event pool machinery.
411-
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
409+
bool InterruptBasedEventEnabled, ur_queue_handle_t Queue, bool IsInternal) {
412410

413411
ze_device_handle_t ZeDevice = nullptr;
414-
415412
if (Device) {
416413
ZeDevice = Device->ZeDevice;
417414
}
415+
416+
if (DisableEventsCaching) {
417+
// Skip all cache handling, always create a new pool
418+
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
419+
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0};
420+
421+
ze_intel_event_sync_mode_exp_desc_t eventSyncMode = {
422+
ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0};
423+
eventSyncMode.syncModeFlags =
424+
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT |
425+
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT;
426+
427+
ZeStruct<ze_event_pool_desc_t> ZeEventPoolDesc;
428+
ZeEventPoolDesc.count = MaxNumEventsPerPool;
429+
ZeEventPoolDesc.flags = 0;
430+
ZeEventPoolDesc.pNext = nullptr;
431+
if (HostVisible)
432+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
433+
if (ProfilingEnabled)
434+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
435+
UR_LOG(DEBUG, "ze_event_pool_desc_t flags set to: {}",
436+
ZeEventPoolDesc.flags);
437+
if (CounterBasedEventEnabled) {
438+
if (UsingImmCmdList) {
439+
counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE;
440+
} else {
441+
counterBasedExt.flags =
442+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
443+
}
444+
UR_LOG(DEBUG, "ze_event_pool_desc_t counter based flags set to: {}",
445+
counterBasedExt.flags);
446+
if (InterruptBasedEventEnabled) {
447+
counterBasedExt.pNext = &eventSyncMode;
448+
}
449+
ZeEventPoolDesc.pNext = &counterBasedExt;
450+
} else if (InterruptBasedEventEnabled) {
451+
ZeEventPoolDesc.pNext = &eventSyncMode;
452+
}
453+
454+
std::vector<ze_device_handle_t> ZeDevices;
455+
if (ZeDevice) {
456+
ZeDevices.push_back(ZeDevice);
457+
} else {
458+
std::for_each(Devices.begin(), Devices.end(),
459+
[&](const ur_device_handle_t &D) {
460+
ZeDevices.push_back(D->ZeDevice);
461+
});
462+
}
463+
464+
ze_result_t Result = ZE_CALL_NOCHECK(
465+
zeEventPoolCreate,
466+
(ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], &Pool));
467+
if (IsInternal && ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES &&
468+
Queue) {
469+
if (!Queue->isInOrderQueue()) {
470+
if (Queue->UsingImmCmdLists) {
471+
UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/,
472+
false /*QueueSynced*/,
473+
nullptr /*CompletedEvent*/));
474+
} else {
475+
UR_CALL(resetCommandLists(Queue));
476+
}
477+
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
478+
ZeDevices.size(), &ZeDevices[0], &Pool));
479+
}
480+
} else if (ze2urResult(Result) != UR_RESULT_SUCCESS) {
481+
return ze2urResult(Result);
482+
}
483+
Index = 0;
484+
NumEventsAvailableInEventPool[Pool] = MaxNumEventsPerPool - 1;
485+
NumEventsUnreleasedInEventPool[Pool] = 1;
486+
return UR_RESULT_SUCCESS;
487+
}
488+
489+
// --- Normal cache-based logic below ---
490+
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
491+
418492
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
419493
HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList,
420494
InterruptBasedEventEnabled, ZeDevice);
@@ -423,6 +497,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
423497
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
424498
if (DisableEventsCaching) {
425499
// Remove full pool from the cache if events caching is disabled.
500+
ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePoolCache->front()));
426501
ZePoolCache->erase(ZePoolCache->begin());
427502
} else {
428503
// If event caching is enabled then we don't destroy events so there is
@@ -488,8 +563,26 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
488563
});
489564
}
490565

491-
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
492-
ZeDevices.size(), &ZeDevices[0], ZePool));
566+
ze_result_t Result = ZE_CALL_NOCHECK(
567+
zeEventPoolCreate,
568+
(ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], ZePool));
569+
if (IsInternal && ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES &&
570+
Queue) {
571+
if (!Queue->isInOrderQueue()) {
572+
if (Queue->UsingImmCmdLists) {
573+
UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/,
574+
false /*QueueSynced*/,
575+
nullptr /*CompletedEvent*/));
576+
} else {
577+
UR_CALL(resetCommandLists(Queue));
578+
}
579+
ZE2UR_CALL(zeEventPoolCreate,
580+
(ZeContext, &ZeEventPoolDesc, ZeDevices.size(),
581+
&ZeDevices[0], ZePool));
582+
}
583+
} else if (ze2urResult(Result) != UR_RESULT_SUCCESS) {
584+
return ze2urResult(Result);
585+
}
493586
NumEventsAvailableInEventPool[*ZePool] = MaxNumEventsPerPool - 1;
494587
NumEventsUnreleasedInEventPool[*ZePool] = 1;
495588
} else {

unified-runtime/source/adapters/level_zero/context.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,12 @@ struct ur_context_handle_t_ : ur_object {
202202
// pool then create new one. The HostVisible parameter tells if we need a
203203
// slot for a host-visible event. The ProfilingEnabled tells is we need a
204204
// slot for an event with profiling capabilities.
205-
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
206-
bool HostVisible,
207-
bool ProfilingEnabled,
208-
ur_device_handle_t Device,
209-
bool CounterBasedEventEnabled,
210-
bool UsingImmCmdList,
211-
bool InterruptBasedEventEnabled);
205+
ur_result_t getFreeSlotInExistingOrNewPool(
206+
ze_event_pool_handle_t &, size_t &, bool HostVisible,
207+
bool ProfilingEnabled, ur_device_handle_t Device,
208+
bool CounterBasedEventEnabled, bool UsingImmCmdList,
209+
bool InterruptBasedEventEnabled, ur_queue_handle_t Queue,
210+
bool IsInternal);
212211

213212
// Get ur_event_handle_t from cache.
214213
ur_event_handle_t getEventFromContextCache(bool HostVisible,

unified-runtime/source/adapters/level_zero/event.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,16 +1349,18 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
13491349
// The "HostVisible" argument specifies if event needs to be allocated from
13501350
// a host-visible pool.
13511351
//
1352-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
1353-
bool IsMultiDevice, bool HostVisible,
1354-
ur_event_handle_t *RetEvent,
1355-
bool CounterBasedEventEnabled,
1356-
bool ForceDisableProfiling,
1357-
bool InterruptBasedEventEnabled) {
1352+
ur_result_t
1353+
EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
1354+
bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent,
1355+
bool CounterBasedEventEnabled, bool ForceDisableProfiling,
1356+
bool InterruptBasedEventEnabled, std::optional<bool> IsInternal) {
13581357
bool ProfilingEnabled =
13591358
ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled());
13601359
bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists;
13611360

1361+
// Handle optional IsInternal parameter - default to false if not provided
1362+
bool isInternalValue = IsInternal.value_or(false);
1363+
13621364
ur_device_handle_t Device = nullptr;
13631365

13641366
if (!IsMultiDevice && Queue) {
@@ -1380,7 +1382,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
13801382
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
13811383
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
13821384
CounterBasedEventEnabled, UsingImmediateCommandlists,
1383-
InterruptBasedEventEnabled))
1385+
InterruptBasedEventEnabled, Queue, isInternalValue))
13841386
return Res;
13851387

13861388
ZeStruct<ze_event_desc_t> ZeEventDesc;

unified-runtime/source/adapters/level_zero/event.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
3737
ur_event_handle_t *RetEvent,
3838
bool CounterBasedEventEnabled,
3939
bool ForceDisableProfiling,
40-
bool InterruptBasedEventEnabled);
40+
bool InterruptBasedEventEnabled,
41+
std::optional<bool> IsInternal = std::nullopt);
4142
} // extern "C"
4243

4344
// This is an experimental option that allows to disable caching of events in

unified-runtime/source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1904,7 +1904,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
19041904
UR_CALL(EventCreate(
19051905
Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event,
19061906
Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/,
1907-
Queue->InterruptBasedEventsEnabled));
1907+
Queue->InterruptBasedEventsEnabled, IsInternal));
19081908

19091909
(*Event)->UrQueue = Queue;
19101910
(*Event)->CommandType = CommandType;

0 commit comments

Comments
 (0)