Skip to content

Commit f4a9497

Browse files
authored
Merge pull request #1370 from winstonzhang-intel/counter-based-events
[L0] Support for counter-based events using L0 driver
2 parents ee07570 + 39fcb2b commit f4a9497

File tree

10 files changed

+143
-54
lines changed

10 files changed

+143
-54
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
933933
MustSignalWaitEvent = false;
934934
}
935935
}
936+
// Given WaitEvent was created without specifying Counting Events, then this
937+
// event can be signalled on the host.
936938
if (MustSignalWaitEvent) {
937939
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
938940
}

source/adapters/level_zero/context.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
471471

472472
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
473473
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
474-
bool ProfilingEnabled, ur_device_handle_t Device) {
474+
bool ProfilingEnabled, ur_device_handle_t Device,
475+
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
475476
// Lock while updating event pool machinery.
476477
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
477478

@@ -481,7 +482,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
481482
ZeDevice = Device->ZeDevice;
482483
}
483484
std::list<ze_event_pool_handle_t> *ZePoolCache =
484-
getZeEventPoolCache(HostVisible, ProfilingEnabled, ZeDevice);
485+
getZeEventPoolCache(HostVisible, ProfilingEnabled,
486+
CounterBasedEventEnabled, UsingImmCmdList, ZeDevice);
485487

486488
if (!ZePoolCache->empty()) {
487489
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
@@ -506,15 +508,27 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
506508
Index = 0;
507509
// Create one event ZePool per MaxNumEventsPerPool events
508510
if (*ZePool == nullptr) {
511+
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
512+
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
509513
ZeStruct<ze_event_pool_desc_t> ZeEventPoolDesc;
510514
ZeEventPoolDesc.count = MaxNumEventsPerPool;
511515
ZeEventPoolDesc.flags = 0;
516+
ZeEventPoolDesc.pNext = nullptr;
512517
if (HostVisible)
513518
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
514519
if (ProfilingEnabled)
515520
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
516521
logger::debug("ze_event_pool_desc_t flags set to: {}",
517522
ZeEventPoolDesc.flags);
523+
if (CounterBasedEventEnabled) {
524+
if (UsingImmCmdList) {
525+
counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE;
526+
} else {
527+
counterBasedExt.flags =
528+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
529+
}
530+
ZeEventPoolDesc.pNext = &counterBasedExt;
531+
}
518532

519533
std::vector<ze_device_handle_t> ZeDevices;
520534
if (ZeDevice) {
@@ -540,14 +554,18 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
540554
}
541555

542556
ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
543-
bool HostVisible, bool WithProfiling, ur_device_handle_t Device) {
557+
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
558+
bool CounterBasedEventEnabled) {
544559
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
545560
auto Cache = getEventCache(HostVisible, WithProfiling, Device);
546561
if (Cache->empty())
547562
return nullptr;
548563

549564
auto It = Cache->begin();
550565
ur_event_handle_t Event = *It;
566+
if (Event->CounterBasedEventsEnabled != CounterBasedEventEnabled) {
567+
return nullptr;
568+
}
551569
Cache->erase(It);
552570
// We have to reset event before using it.
553571
Event->reset();
@@ -579,13 +597,16 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
579597
}
580598

581599
ze_device_handle_t ZeDevice = nullptr;
600+
bool UsingImmediateCommandlists =
601+
!Event->UrQueue || Event->UrQueue->UsingImmCmdLists;
582602

583603
if (!Event->IsMultiDevice && Event->UrQueue) {
584604
ZeDevice = Event->UrQueue->Device->ZeDevice;
585605
}
586606

587607
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
588-
Event->isHostVisible(), Event->isProfilingEnabled(), ZeDevice);
608+
Event->isHostVisible(), Event->isProfilingEnabled(),
609+
Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, ZeDevice);
589610

590611
// Put the empty pool to the cache of the pools.
591612
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0)
@@ -683,8 +704,8 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
683704
// Make sure to acquire the lock before checking the size, or there
684705
// will be a race condition.
685706
std::scoped_lock<ur_mutex> Lock(Queue->Context->ZeCommandListCacheMutex);
686-
// Under mutex since operator[] does insertion on the first usage for every
687-
// unique ZeDevice.
707+
// Under mutex since operator[] does insertion on the first usage for
708+
// every unique ZeDevice.
688709
auto &ZeCommandListCache =
689710
UseCopyEngine
690711
? Queue->Context->ZeCopyCommandListCache[Queue->Device->ZeDevice]

source/adapters/level_zero/context.hpp

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ struct ur_context_handle_t_ : _ur_object {
146146
// head.
147147
//
148148
// Cache of event pools to which host-visible events are added to.
149-
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
149+
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{12};
150150
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
151-
ZeEventPoolCacheDeviceMap{4};
151+
ZeEventPoolCacheDeviceMap{12};
152152

153153
// This map will be used to determine if a pool is full or not
154154
// by storing number of empty slots available in the pool.
@@ -199,48 +199,73 @@ struct ur_context_handle_t_ : _ur_object {
199199
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
200200
bool HostVisible,
201201
bool ProfilingEnabled,
202-
ur_device_handle_t Device);
202+
ur_device_handle_t Device,
203+
bool CounterBasedEventEnabled,
204+
bool UsingImmCmdList);
203205

204206
// Get ur_event_handle_t from cache.
205207
ur_event_handle_t getEventFromContextCache(bool HostVisible,
206208
bool WithProfiling,
207-
ur_device_handle_t Device);
209+
ur_device_handle_t Device,
210+
bool CounterBasedEventEnabled);
208211

209212
// Add ur_event_handle_t to cache.
210213
void addEventToContextCache(ur_event_handle_t);
211214

215+
enum EventPoolCacheType {
216+
HostVisibleCacheType,
217+
HostInvisibleCacheType,
218+
HostVisibleCounterBasedRegularCacheType,
219+
HostInvisibleCounterBasedRegularCacheType,
220+
HostVisibleCounterBasedImmediateCacheType,
221+
HostInvisibleCounterBasedImmediateCacheType
222+
};
223+
212224
std::list<ze_event_pool_handle_t> *
213225
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
226+
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
214227
ze_device_handle_t ZeDevice) {
215-
if (HostVisible) {
216-
if (ZeDevice) {
217-
auto ZeEventPoolCacheMap = WithProfiling
218-
? &ZeEventPoolCacheDeviceMap[0]
219-
: &ZeEventPoolCacheDeviceMap[1];
220-
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
221-
ZeEventPoolCache.emplace_back();
222-
ZeEventPoolCacheMap->insert(
223-
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
224-
}
225-
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
226-
} else {
227-
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
228+
EventPoolCacheType CacheType;
229+
230+
calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
231+
UsingImmediateCmdList, CacheType);
232+
if (ZeDevice) {
233+
auto ZeEventPoolCacheMap =
234+
WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2]
235+
: &ZeEventPoolCacheDeviceMap[CacheType * 2 + 1];
236+
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
237+
ZeEventPoolCache.emplace_back();
238+
ZeEventPoolCacheMap->insert(
239+
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
228240
}
241+
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
229242
} else {
230-
if (ZeDevice) {
231-
auto ZeEventPoolCacheMap = WithProfiling
232-
? &ZeEventPoolCacheDeviceMap[2]
233-
: &ZeEventPoolCacheDeviceMap[3];
234-
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
235-
ZeEventPoolCache.emplace_back();
236-
ZeEventPoolCacheMap->insert(
237-
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
238-
}
239-
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
240-
} else {
241-
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
242-
}
243+
return WithProfiling ? &ZeEventPoolCache[CacheType * 2]
244+
: &ZeEventPoolCache[CacheType * 2 + 1];
245+
}
246+
}
247+
248+
ur_result_t calculateCacheIndex(bool HostVisible,
249+
bool CounterBasedEventEnabled,
250+
bool UsingImmediateCmdList,
251+
EventPoolCacheType &CacheType) {
252+
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
253+
CacheType = HostVisibleCounterBasedRegularCacheType;
254+
} else if (CounterBasedEventEnabled && !HostVisible &&
255+
!UsingImmediateCmdList) {
256+
CacheType = HostInvisibleCounterBasedRegularCacheType;
257+
} else if (CounterBasedEventEnabled && HostVisible &&
258+
UsingImmediateCmdList) {
259+
CacheType = HostVisibleCounterBasedImmediateCacheType;
260+
} else if (CounterBasedEventEnabled && !HostVisible &&
261+
UsingImmediateCmdList) {
262+
CacheType = HostInvisibleCounterBasedImmediateCacheType;
263+
} else if (!CounterBasedEventEnabled && HostVisible) {
264+
CacheType = HostVisibleCacheType;
265+
} else {
266+
CacheType = HostInvisibleCacheType;
243267
}
268+
return UR_RESULT_SUCCESS;
244269
}
245270

246271
// Decrement number of events living in the pool upon event destroy

source/adapters/level_zero/event.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
130130
if (OutEvent) {
131131
Queue->LastCommandEvent = reinterpret_cast<ur_event_handle_t>(*OutEvent);
132132

133-
ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent));
133+
if (!(*OutEvent)->CounterBasedEventsEnabled)
134+
ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent));
134135
(*OutEvent)->Completed = true;
135136
}
136137
}
@@ -766,7 +767,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate(
766767
UR_CALL(EventCreate(Context, nullptr, false, true, Event));
767768

768769
(*Event)->RefCountExternal++;
769-
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
770+
if (!(*Event)->CounterBasedEventsEnabled)
771+
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
770772
return UR_RESULT_SUCCESS;
771773
}
772774

@@ -784,7 +786,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
784786
UR_CALL(EventCreate(Context, nullptr, false, true, Event));
785787

786788
(*Event)->RefCountExternal++;
787-
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
789+
if (!(*Event)->CounterBasedEventsEnabled)
790+
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
788791
return UR_RESULT_SUCCESS;
789792
}
790793

@@ -1061,9 +1064,11 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
10611064
//
10621065
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10631066
bool IsMultiDevice, bool HostVisible,
1064-
ur_event_handle_t *RetEvent) {
1067+
ur_event_handle_t *RetEvent,
1068+
bool CounterBasedEventEnabled) {
10651069

10661070
bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();
1071+
bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists;
10671072

10681073
ur_device_handle_t Device = nullptr;
10691074

@@ -1072,7 +1077,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10721077
}
10731078

10741079
if (auto CachedEvent = Context->getEventFromContextCache(
1075-
HostVisible, ProfilingEnabled, Device)) {
1080+
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) {
10761081
*RetEvent = CachedEvent;
10771082
return UR_RESULT_SUCCESS;
10781083
}
@@ -1083,14 +1088,15 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10831088
size_t Index = 0;
10841089

10851090
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
1086-
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
1091+
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
1092+
CounterBasedEventEnabled, UsingImmediateCommandlists))
10871093
return Res;
10881094

10891095
ZeStruct<ze_event_desc_t> ZeEventDesc;
10901096
ZeEventDesc.index = Index;
10911097
ZeEventDesc.wait = 0;
10921098

1093-
if (HostVisible) {
1099+
if (HostVisible || CounterBasedEventEnabled) {
10941100
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
10951101
} else {
10961102
//
@@ -1115,7 +1121,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
11151121
} catch (...) {
11161122
return UR_RESULT_ERROR_UNKNOWN;
11171123
}
1118-
1124+
(*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled;
11191125
if (HostVisible)
11201126
(*RetEvent)->HostVisibleEvent =
11211127
reinterpret_cast<ur_event_handle_t>(*RetEvent);
@@ -1137,8 +1143,8 @@ ur_result_t ur_event_handle_t_::reset() {
11371143

11381144
if (!isHostVisible())
11391145
HostVisibleEvent = nullptr;
1140-
1141-
ZE2UR_CALL(zeEventHostReset, (ZeEvent));
1146+
if (!CounterBasedEventsEnabled)
1147+
ZE2UR_CALL(zeEventHostReset, (ZeEvent));
11421148
return UR_RESULT_SUCCESS;
11431149
}
11441150

@@ -1339,7 +1345,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
13391345

13401346
zeCommandListAppendWaitOnEvents(ZeCommandList, 1u,
13411347
&EventList[I]->ZeEvent);
1342-
zeEventHostSignal(MultiDeviceZeEvent);
1348+
if (!MultiDeviceEvent->CounterBasedEventsEnabled)
1349+
zeEventHostSignal(MultiDeviceZeEvent);
13431350

13441351
UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false,
13451352
/* OkToBatchCommand */ true));

source/adapters/level_zero/event.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ extern "C" {
3131
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
3232
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
3333
bool IsMultiDevice, bool HostVisible,
34-
ur_event_handle_t *RetEvent);
34+
ur_event_handle_t *RetEvent,
35+
bool CounterBasedEventEnabled = false);
3536
} // extern "C"
3637

3738
// This is an experimental option that allows to disable caching of events in
@@ -226,6 +227,8 @@ struct ur_event_handle_t_ : _ur_object {
226227
// completion batch for this event. Only used for out-of-order immediate
227228
// command lists.
228229
std::optional<ur_completion_batch_it> completionBatch;
230+
// Keeps track of whether we are using Counter-based Events.
231+
bool CounterBasedEventsEnabled = false;
229232
};
230233

231234
// Helper function to implement zeHostSynchronize.

source/adapters/level_zero/memory.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
944944
}
945945

946946
// Signal this event
947-
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
947+
if (!(*Event)->CounterBasedEventsEnabled)
948+
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
948949
(*Event)->Completed = true;
949950
return UR_RESULT_SUCCESS;
950951
}
@@ -1078,8 +1079,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
10781079
if (Buffer->MapHostPtr)
10791080
memcpy(ZeHandleDst + MapInfo.Offset, MappedPtr, MapInfo.Size);
10801081

1081-
// Signal this event
1082-
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
1082+
// Signal this event if it is not using counter based events
1083+
if (!(*Event)->CounterBasedEventsEnabled)
1084+
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
10831085
(*Event)->Completed = true;
10841086
return UR_RESULT_SUCCESS;
10851087
}

source/adapters/level_zero/platform.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,14 @@ ur_result_t ur_platform_handle_t_::initialize() {
199199
ZeDriverModuleProgramExtensionFound = true;
200200
}
201201
}
202+
// Check if extension is available for Counting Events.
203+
if (strncmp(extension.name, ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME,
204+
strlen(ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME) + 1) == 0) {
205+
if (extension.version ==
206+
ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_CURRENT) {
207+
ZeDriverEventPoolCountingEventsExtensionFound = true;
208+
}
209+
}
202210
zeDriverExtensionMap[extension.name] = extension.version;
203211
}
204212

source/adapters/level_zero/platform.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct ur_platform_handle_t_ : public _ur_platform {
3535
// Flags to tell whether various Level Zero platform extensions are available.
3636
bool ZeDriverGlobalOffsetExtensionFound{false};
3737
bool ZeDriverModuleProgramExtensionFound{false};
38+
bool ZeDriverEventPoolCountingEventsExtensionFound{false};
3839

3940
// Cache UR devices for reuse
4041
std::vector<std::unique_ptr<ur_device_handle_t_>> URDevicesCache;

0 commit comments

Comments
 (0)