Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,12 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(

ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
bool CounterBasedEventEnabled) {
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
auto Cache = getEventCache(HostVisible, WithProfiling, Device);
if (CounterBasedEventEnabled) {
Cache = getCounterBasedEventCache(WithProfiling, UsingImmCmdList, Device);
}
if (Cache->empty())
return nullptr;

Expand All @@ -585,9 +588,17 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
Device = Event->UrQueue->Device;
}

auto Cache = getEventCache(Event->isHostVisible(),
Event->isProfilingEnabled(), Device);
Cache->emplace_back(Event);
if (Event->CounterBasedEventsEnabled) {
bool UsingImmediateCommandlists =
!Event->UrQueue || Event->UrQueue->UsingImmCmdLists;
auto Cache = getCounterBasedEventCache(Event->isProfilingEnabled(),
UsingImmediateCommandlists, Device);
Cache->emplace_back(Event);
} else {
auto Cache = getEventCache(Event->isHostVisible(),
Event->isProfilingEnabled(), Device);
Cache->emplace_back(Event);
}
}

ur_result_t
Expand Down
91 changes: 68 additions & 23 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ struct ur_context_handle_t_ : _ur_object {
: ZeContext{ZeContext}, Devices{Devs, Devs + NumDevices},
NumDevices{NumDevices} {
OwnNativeHandle = OwnZeContext;
for (const auto &Device : Devices) {
for (int i = 0; i < EventCacheTypeCount; i++) {
EventCaches.emplace_back();
EventCachesDeviceMap[i].insert(
std::make_pair(Device, EventCaches.size() - 1));
}
}
}

ur_context_handle_t_(ze_context_handle_t ZeContext) : ZeContext{ZeContext} {}
Expand Down Expand Up @@ -150,9 +157,10 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{12};
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{
ZeEventPoolCacheTypeCount * 2};
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
ZeEventPoolCacheDeviceMap{12};
ZeEventPoolCacheDeviceMap{ZeEventPoolCacheTypeCount * 2};

// This map will be used to determine if a pool is full or not
// by storing number of empty slots available in the pool.
Expand All @@ -174,9 +182,9 @@ struct ur_context_handle_t_ : _ur_object {

// Caches for events.
using EventCache = std::vector<std::list<ur_event_handle_t>>;
EventCache EventCaches{4};
EventCache EventCaches{EventCacheTypeCount};
std::vector<std::unordered_map<ur_device_handle_t, size_t>>
EventCachesDeviceMap{4};
EventCachesDeviceMap{EventCacheTypeCount};

// Initialize the PI context.
ur_result_t initialize();
Expand Down Expand Up @@ -214,25 +222,39 @@ struct ur_context_handle_t_ : _ur_object {
ur_event_handle_t getEventFromContextCache(bool HostVisible,
bool WithProfiling,
ur_device_handle_t Device,
bool CounterBasedEventEnabled);
bool CounterBasedEventEnabled,
bool UsingImmCmdList);

// Add ur_event_handle_t to cache.
void addEventToContextCache(ur_event_handle_t);

enum EventPoolCacheType {
enum ZeEventPoolCacheType {
HostVisibleCacheType,
HostInvisibleCacheType,
HostVisibleCounterBasedRegularCacheType,
HostInvisibleCounterBasedRegularCacheType,
HostVisibleCounterBasedImmediateCacheType,
HostInvisibleCounterBasedImmediateCacheType
HostInvisibleCounterBasedImmediateCacheType,
ZeEventPoolCacheTypeCount
};

enum EventCacheType {
HostVisibleProfilingCacheType,
HostVisibleRegularCacheType,
HostInvisibleProfilingCacheType,
HostInvisibleRegularCacheType,
CounterBasedImmediateCacheType,
CounterBasedRegularCacheType,
CounterBasedImmediateProfilingCacheType,
CounterBasedRegularProfilingCacheType,
EventCacheTypeCount
};

std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
ze_device_handle_t ZeDevice) {
EventPoolCacheType CacheType;
ZeEventPoolCacheType CacheType;

calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
UsingImmediateCmdList, CacheType);
Expand All @@ -255,7 +277,7 @@ struct ur_context_handle_t_ : _ur_object {
ur_result_t calculateCacheIndex(bool HostVisible,
bool CounterBasedEventEnabled,
bool UsingImmediateCmdList,
EventPoolCacheType &CacheType) {
ZeEventPoolCacheType &CacheType) {
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
Expand Down Expand Up @@ -319,28 +341,51 @@ struct ur_context_handle_t_ : _ur_object {
if (HostVisible) {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
EventCachesMap->insert(
std::make_pair(Device, EventCaches.size() - 1));
}
WithProfiling ? &EventCachesDeviceMap[HostVisibleProfilingCacheType]
: &EventCachesDeviceMap[HostVisibleRegularCacheType];
return &EventCaches[(*EventCachesMap)[Device]];
} else {
return WithProfiling ? &EventCaches[HostVisibleProfilingCacheType]
: &EventCaches[HostVisibleRegularCacheType];
}
} else {
if (Device) {
auto EventCachesMap =
WithProfiling
? &EventCachesDeviceMap[HostInvisibleProfilingCacheType]
: &EventCachesDeviceMap[HostInvisibleRegularCacheType];
return &EventCaches[(*EventCachesMap)[Device]];
} else {
return WithProfiling ? &EventCaches[HostInvisibleProfilingCacheType]
: &EventCaches[HostInvisibleRegularCacheType];
}
}
};
auto getCounterBasedEventCache(bool WithProfiling, bool UsingImmediateCmdList,
ur_device_handle_t Device) {
if (UsingImmediateCmdList) {
if (Device) {
auto EventCachesMap =
WithProfiling
? &EventCachesDeviceMap[CounterBasedImmediateProfilingCacheType]
: &EventCachesDeviceMap[CounterBasedImmediateCacheType];
return &EventCaches[(*EventCachesMap)[Device]];
} else {
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
return WithProfiling
? &EventCaches[CounterBasedImmediateProfilingCacheType]
: &EventCaches[CounterBasedImmediateCacheType];
}
} else {
if (Device) {
auto EventCachesMap =
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
EventCaches.emplace_back();
EventCachesMap->insert(
std::make_pair(Device, EventCaches.size() - 1));
}
WithProfiling
? &EventCachesDeviceMap[CounterBasedRegularProfilingCacheType]
: &EventCachesDeviceMap[CounterBasedRegularCacheType];
return &EventCaches[(*EventCachesMap)[Device]];
} else {
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
return WithProfiling
? &EventCaches[CounterBasedRegularProfilingCacheType]
: &EventCaches[CounterBasedRegularCacheType];
}
}
}
Expand Down
20 changes: 18 additions & 2 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,22 @@ urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list
} else {
ZE2UR_CALL(zeHostSynchronize, (ZeEvent));
}
Event->Completed = true;
if (Event->CounterBasedEventsEnabled &&
Event->CommandList.value()->second.ZeFence &&
Event->CommandList.value()->second.ZeFenceInUse) {
while (true) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is viable workaround.
Command list may have multiple events, i.e.

kernel1 -> signal event1
kernel2 -> signal event2
finally fence signal

with this WA, waiting on event1 would also wait on kernel2 , which is not acceptable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense, the issue is that there is the case where the fence is somehow signaled before the last event is signaled. Maybe we can check if it is the last event in the commandlist?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No , those WA's are not valid.
Either we fix the problem or don't enable the feature on affected GPUs.

logger::debug("Event completed, checking fence status");
ze_result_t ZeResult =
ZE_CALL_NOCHECK(zeFenceQueryStatus,
(Event->CommandList.value()->second.ZeFence));
if (ZeResult == ZE_RESULT_SUCCESS) {
Event->Completed = true;
break;
}
}
} else {
Event->Completed = true;
}
}
}
if (auto Q = Event->UrQueue) {
Expand Down Expand Up @@ -1272,7 +1287,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
}

if (auto CachedEvent = Context->getEventFromContextCache(
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) {
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled,
UsingImmediateCommandlists)) {
*RetEvent = CachedEvent;
return UR_RESULT_SUCCESS;
}
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,7 @@ ur_queue_handle_t_::ur_queue_handle_t_(
return std::atoi(UrRet) != 0;
}();
this->CounterBasedEventsEnabled =
UsingImmCmdLists && isInOrderQueue() && Device->useDriverInOrderLists() &&
isInOrderQueue() && Device->useDriverInOrderLists() &&
useDriverCounterBasedEvents &&
Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound;
}
Expand Down
Loading