@@ -171,48 +171,63 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
171171 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
172172
173173 // Helper function for appending a barrier to a command list.
174- auto insertBarrierIntoCmdList =
175- [&Queue](ur_command_list_ptr_t CmdList,
176- const _ur_ze_event_list_t &EventWaitList,
177- ur_event_handle_t &Event, bool IsInternal) {
178- UR_CALL (createEventAndAssociateQueue (
179- Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList,
180- IsInternal, false ));
181-
182- Event->WaitList = EventWaitList;
183-
184- // For in-order queue we don't need a real barrier, just wait for
185- // requested events in potentially different queues and add a "barrier"
186- // event signal because it is already guaranteed that previous commands
187- // in this queue are completed when the signal is started.
188- //
189- // Only consideration here is that when profiling is used, signalEvent
190- // cannot be used if EventWaitList.Lenght == 0. In those cases, we need
191- // to fallback directly to barrier to have correct timestamps. See here:
192- // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
193- //
194- // TODO: this and other special handling of in-order queues to be
195- // updated when/if Level Zero adds native support for in-order queues.
196- //
197- if (Queue->isInOrderQueue () && InOrderBarrierBySignal &&
198- !Queue->isProfilingEnabled ()) {
199- // If we are using driver in order lists, then append wait on events
200- // is unnecessary and we can signal the event created.
201- if (EventWaitList.Length && !CmdList->second .IsInOrderList ) {
202- ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
203- (CmdList->first , EventWaitList.Length ,
204- EventWaitList.ZeEventList ));
174+ auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList,
175+ _ur_ze_event_list_t &EventWaitList,
176+ ur_event_handle_t &Event,
177+ bool IsInternal) {
178+ UR_CALL (createEventAndAssociateQueue (Queue, &Event,
179+ UR_COMMAND_EVENTS_WAIT_WITH_BARRIER,
180+ CmdList, IsInternal, false ));
181+
182+ Event->WaitList = EventWaitList;
183+
184+ // For in-order queue we don't need a real barrier, just wait for
185+ // requested events in potentially different queues and add a "barrier"
186+ // event signal because it is already guaranteed that previous commands
187+ // in this queue are completed when the signal is started.
188+ //
189+ // Only consideration here is that when profiling is used, signalEvent
190+ // cannot be used if EventWaitList.Lenght == 0. In those cases, we need
191+ // to fallback directly to barrier to have correct timestamps. See here:
192+ // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
193+ //
194+ // TODO: this and other special handling of in-order queues to be
195+ // updated when/if Level Zero adds native support for in-order queues.
196+ //
197+ if (Queue->isInOrderQueue () && InOrderBarrierBySignal &&
198+ !Queue->isProfilingEnabled ()) {
199+ // If we are using driver in order lists, then append wait on events
200+ // is unnecessary IF the cmdlists match.
201+ if (EventWaitList.Length ) {
202+ if (CmdList->second .IsInOrderList ) {
203+ for (unsigned i = EventWaitList.Length ; i-- < 0 ;) {
204+ // if the events is from the same cmdlist, we can remove it
205+ // from the waitlist.
206+ if (EventWaitList.UrEventList [i]->CommandList == CmdList) {
207+ EventWaitList.Length --;
208+ if (EventWaitList.Length != i) {
209+ std::swap (EventWaitList.UrEventList [i],
210+ EventWaitList.UrEventList [EventWaitList.Length ]);
211+ std::swap (EventWaitList.ZeEventList [i],
212+ EventWaitList.ZeEventList [EventWaitList.Length ]);
213+ }
214+ }
205215 }
206- ZE2UR_CALL (zeCommandListAppendSignalEvent,
207- (CmdList->first , Event->ZeEvent ));
208- } else {
209- ZE2UR_CALL (zeCommandListAppendBarrier,
210- (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
211- EventWaitList.ZeEventList ));
212216 }
217+ ZE2UR_CALL (
218+ zeCommandListAppendWaitOnEvents,
219+ (CmdList->first , EventWaitList.Length , EventWaitList.ZeEventList ));
220+ }
221+ ZE2UR_CALL (zeCommandListAppendSignalEvent,
222+ (CmdList->first , Event->ZeEvent ));
223+ } else {
224+ ZE2UR_CALL (zeCommandListAppendBarrier,
225+ (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
226+ EventWaitList.ZeEventList ));
227+ }
213228
214- return UR_RESULT_SUCCESS;
215- };
229+ return UR_RESULT_SUCCESS;
230+ };
216231
217232 // If the queue is in-order then each command in it effectively acts as a
218233 // barrier, so we don't need to do anything except if we were requested
@@ -349,9 +364,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
349364 // command-lists.
350365 std::vector<ur_event_handle_t > EventWaitVector (CmdLists.size ());
351366 for (size_t I = 0 ; I < CmdLists.size (); ++I) {
352- UR_CALL ( insertBarrierIntoCmdList (CmdLists[I], _ur_ze_event_list_t {},
353- EventWaitVector[I],
354- true /* IsInternal*/ ));
367+ _ur_ze_event_list_t waitlist;
368+ UR_CALL ( insertBarrierIntoCmdList (
369+ CmdLists[I], waitlist, EventWaitVector[I], true /* IsInternal*/ ));
355370 }
356371 // If there were multiple queues we need to create a "convergence" event to
357372 // be our active barrier. This convergence event is signalled by a barrier
@@ -376,8 +391,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
376391 // If there is only a single queue then insert a barrier and the single
377392 // result event can be used as our active barrier and used as the return
378393 // event. Take into account whether output event is discarded or not.
379- UR_CALL (insertBarrierIntoCmdList (CmdLists[0 ], _ur_ze_event_list_t {},
380- ResultEvent, IsInternal));
394+ _ur_ze_event_list_t waitlist;
395+ UR_CALL (insertBarrierIntoCmdList (CmdLists[0 ], waitlist, ResultEvent,
396+ IsInternal));
381397 }
382398
383399 // Execute each command list so the barriers can be encountered.
0 commit comments