diff --git a/unified-runtime/source/adapters/level_zero/context.cpp b/unified-runtime/source/adapters/level_zero/context.cpp index fe690f3673934..4a89494bb523a 100644 --- a/unified-runtime/source/adapters/level_zero/context.cpp +++ b/unified-runtime/source/adapters/level_zero/context.cpp @@ -406,15 +406,89 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible, bool ProfilingEnabled, ur_device_handle_t Device, bool CounterBasedEventEnabled, bool UsingImmCmdList, - bool InterruptBasedEventEnabled) { - // Lock while updating event pool machinery. - std::scoped_lock Lock(ZeEventPoolCacheMutex); + bool InterruptBasedEventEnabled, ur_queue_handle_t Queue, bool IsInternal) { ze_device_handle_t ZeDevice = nullptr; - if (Device) { ZeDevice = Device->ZeDevice; } + + if (DisableEventsCaching) { + // Skip all cache handling, always create a new pool + ze_event_pool_counter_based_exp_desc_t counterBasedExt = { + ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC, nullptr, 0}; + + ze_intel_event_sync_mode_exp_desc_t eventSyncMode = { + ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0}; + eventSyncMode.syncModeFlags = + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT | + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT; + + ZeStruct ZeEventPoolDesc; + ZeEventPoolDesc.count = MaxNumEventsPerPool; + ZeEventPoolDesc.flags = 0; + ZeEventPoolDesc.pNext = nullptr; + if (HostVisible) + ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + if (ProfilingEnabled) + ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + UR_LOG(DEBUG, "ze_event_pool_desc_t flags set to: {}", + ZeEventPoolDesc.flags); + if (CounterBasedEventEnabled) { + if (UsingImmCmdList) { + counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE; + } else { + counterBasedExt.flags = + ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; + } + UR_LOG(DEBUG, "ze_event_pool_desc_t counter based flags set to: {}", + counterBasedExt.flags); + if (InterruptBasedEventEnabled) { + counterBasedExt.pNext = &eventSyncMode; + } + ZeEventPoolDesc.pNext = &counterBasedExt; + } else if (InterruptBasedEventEnabled) { + ZeEventPoolDesc.pNext = &eventSyncMode; + } + + std::vector ZeDevices; + if (ZeDevice) { + ZeDevices.push_back(ZeDevice); + } else { + std::for_each(Devices.begin(), Devices.end(), + [&](const ur_device_handle_t &D) { + ZeDevices.push_back(D->ZeDevice); + }); + } + + ze_result_t Result = ZE_CALL_NOCHECK( + zeEventPoolCreate, + (ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], &Pool)); + if (IsInternal && ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES && + Queue) { + if (!Queue->isInOrderQueue()) { + if (Queue->UsingImmCmdLists) { + UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/, + false /*QueueSynced*/, + nullptr /*CompletedEvent*/)); + } else { + UR_CALL(resetCommandLists(Queue)); + } + ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc, + ZeDevices.size(), &ZeDevices[0], &Pool)); + } + } else if (ze2urResult(Result) != UR_RESULT_SUCCESS) { + return ze2urResult(Result); + } + Index = 0; + NumEventsAvailableInEventPool[Pool] = MaxNumEventsPerPool - 1; + NumEventsUnreleasedInEventPool[Pool] = 1; + return UR_RESULT_SUCCESS; + } + + // --- Normal cache-based logic below --- + std::scoped_lock Lock(ZeEventPoolCacheMutex); + std::list *ZePoolCache = getZeEventPoolCache( HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList, InterruptBasedEventEnabled, ZeDevice); @@ -423,6 +497,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) { if (DisableEventsCaching) { // Remove full pool from the cache if events caching is disabled. + ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePoolCache->front())); ZePoolCache->erase(ZePoolCache->begin()); } else { // If event caching is enabled then we don't destroy events so there is @@ -488,8 +563,26 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( }); } - ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc, - ZeDevices.size(), &ZeDevices[0], ZePool)); + ze_result_t Result = ZE_CALL_NOCHECK( + zeEventPoolCreate, + (ZeContext, &ZeEventPoolDesc, ZeDevices.size(), &ZeDevices[0], ZePool)); + if (IsInternal && ze2urResult(Result) == UR_RESULT_ERROR_OUT_OF_RESOURCES && + Queue) { + if (!Queue->isInOrderQueue()) { + if (Queue->UsingImmCmdLists) { + UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/, + false /*QueueSynced*/, + nullptr /*CompletedEvent*/)); + } else { + UR_CALL(resetCommandLists(Queue)); + } + ZE2UR_CALL(zeEventPoolCreate, + (ZeContext, &ZeEventPoolDesc, ZeDevices.size(), + &ZeDevices[0], ZePool)); + } + } else if (ze2urResult(Result) != UR_RESULT_SUCCESS) { + return ze2urResult(Result); + } NumEventsAvailableInEventPool[*ZePool] = MaxNumEventsPerPool - 1; NumEventsUnreleasedInEventPool[*ZePool] = 1; } else { diff --git a/unified-runtime/source/adapters/level_zero/context.hpp b/unified-runtime/source/adapters/level_zero/context.hpp index fbcbceb71b7f0..203f755fc0bdc 100644 --- a/unified-runtime/source/adapters/level_zero/context.hpp +++ b/unified-runtime/source/adapters/level_zero/context.hpp @@ -202,13 +202,12 @@ struct ur_context_handle_t_ : ur_object { // pool then create new one. The HostVisible parameter tells if we need a // slot for a host-visible event. The ProfilingEnabled tells is we need a // slot for an event with profiling capabilities. - ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &, - bool HostVisible, - bool ProfilingEnabled, - ur_device_handle_t Device, - bool CounterBasedEventEnabled, - bool UsingImmCmdList, - bool InterruptBasedEventEnabled); + ur_result_t getFreeSlotInExistingOrNewPool( + ze_event_pool_handle_t &, size_t &, bool HostVisible, + bool ProfilingEnabled, ur_device_handle_t Device, + bool CounterBasedEventEnabled, bool UsingImmCmdList, + bool InterruptBasedEventEnabled, ur_queue_handle_t Queue, + bool IsInternal); // Get ur_event_handle_t from cache. ur_event_handle_t getEventFromContextCache(bool HostVisible, diff --git a/unified-runtime/source/adapters/level_zero/event.cpp b/unified-runtime/source/adapters/level_zero/event.cpp index e1834376f0b41..7e71058dda7d3 100644 --- a/unified-runtime/source/adapters/level_zero/event.cpp +++ b/unified-runtime/source/adapters/level_zero/event.cpp @@ -1349,16 +1349,18 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // The "HostVisible" argument specifies if event needs to be allocated from // a host-visible pool. // -ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, - bool IsMultiDevice, bool HostVisible, - ur_event_handle_t *RetEvent, - bool CounterBasedEventEnabled, - bool ForceDisableProfiling, - bool InterruptBasedEventEnabled) { +ur_result_t +EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, + bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, + bool CounterBasedEventEnabled, bool ForceDisableProfiling, + bool InterruptBasedEventEnabled, std::optional IsInternal) { bool ProfilingEnabled = ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled()); bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists; + // Handle optional IsInternal parameter - default to false if not provided + bool isInternalValue = IsInternal.value_or(false); + ur_device_handle_t Device = nullptr; if (!IsMultiDevice && Queue) { @@ -1380,7 +1382,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, if (auto Res = Context->getFreeSlotInExistingOrNewPool( ZeEventPool, Index, HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled, UsingImmediateCommandlists, - InterruptBasedEventEnabled)) + InterruptBasedEventEnabled, Queue, isInternalValue)) return Res; ZeStruct ZeEventDesc; diff --git a/unified-runtime/source/adapters/level_zero/event.hpp b/unified-runtime/source/adapters/level_zero/event.hpp index c89ee5097c8e8..7e5efa87bb6f2 100644 --- a/unified-runtime/source/adapters/level_zero/event.hpp +++ b/unified-runtime/source/adapters/level_zero/event.hpp @@ -37,7 +37,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, bool ForceDisableProfiling, - bool InterruptBasedEventEnabled); + bool InterruptBasedEventEnabled, + std::optional IsInternal = std::nullopt); } // extern "C" // This is an experimental option that allows to disable caching of events in diff --git a/unified-runtime/source/adapters/level_zero/queue.cpp b/unified-runtime/source/adapters/level_zero/queue.cpp index 4cb06f1348b7f..4dcd745eec0ac 100644 --- a/unified-runtime/source/adapters/level_zero/queue.cpp +++ b/unified-runtime/source/adapters/level_zero/queue.cpp @@ -1904,7 +1904,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, UR_CALL(EventCreate( Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/, - Queue->InterruptBasedEventsEnabled)); + Queue->InterruptBasedEventsEnabled, IsInternal)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType;