@@ -282,18 +282,23 @@ event queue_impl::memcpyFromDeviceGlobal(
282282 DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
283283}
284284
285- sycl::detail::optional<event> queue_impl::getLastEvent () {
285+ sycl::detail::optional<event>
286+ queue_impl::getLastEvent (const std::shared_ptr<queue_impl> &Self) {
286287 // The external event is required to finish last if set, so it is considered
287288 // the last event if present.
288289 if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read ())
289290 return ExternalEvent;
290291
291292 std::lock_guard<std::mutex> Lock{MMutex};
292- if (MGraph. expired () && !MDefaultGraphDeps. LastEventPtr )
293+ if (MEmpty )
293294 return std::nullopt ;
294- if (!MGraph.expired () && MExtGraphDeps.LastEventPtr )
295- return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr );
296- return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr );
295+ auto &LastEvent = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
296+ : MExtGraphDeps.LastEventPtr ;
297+ // If the event comes from a graph, we must return it.
298+ if (LastEvent)
299+ return detail::createSyclObjFromImpl<event>(LastEvent);
300+ // We insert a marker to represent an event at end.
301+ return detail::createSyclObjFromImpl<event>(insertMarkerEvent (Self));
297302}
298303
299304void queue_impl::addEvent (const event &Event) {
@@ -344,9 +349,49 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
344349
345350 HandlerImpl->MEventMode = SubmitInfo.EventMode ();
346351
347- auto Event = finalizeHandler (Handler, SubmitInfo.PostProcessorFunc ());
352+ auto isHostTask = Type == CGType::CodeplayHostTask;
353+
354+ // TODO: this shouldn't be needed but without this
355+ // the legacy adapter doesn't synchronize the operations properly
356+ // when non-immediate command lists are used.
357+ auto isGraphSubmission = Type == CGType::ExecCommandBuffer;
358+
359+ auto requiresPostProcess = SubmitInfo.PostProcessorFunc () || Streams.size ();
360+ auto noLastEventPath = !isHostTask && !isGraphSubmission &&
361+ MNoEventMode.load (std::memory_order_relaxed) &&
362+ !requiresPostProcess;
348363
349- addEvent (Event);
364+ if (noLastEventPath) {
365+ std::unique_lock<std::mutex> Lock (MMutex);
366+
367+ // Check if we are still in no event mode. There could
368+ // have been a concurrent submit.
369+ if (MNoEventMode.load (std::memory_order_relaxed)) {
370+ return finalizeHandlerInOrderNoEventsUnlocked (Handler);
371+ }
372+ }
373+
374+ event Event;
375+ if (!isInOrder ()) {
376+ Event = finalizeHandlerOutOfOrder (Handler);
377+ addEvent (Event);
378+ } else {
379+ if (isHostTask) {
380+ std::unique_lock<std::mutex> Lock (MMutex);
381+ Event = finalizeHandlerInOrderHostTaskUnlocked (Handler);
382+ } else {
383+ std::unique_lock<std::mutex> Lock (MMutex);
384+
385+ if (!isGraphSubmission && trySwitchingToNoEventsMode ()) {
386+ Event = finalizeHandlerInOrderNoEventsUnlocked (Handler);
387+ } else {
388+ Event = finalizeHandlerInOrderWithDepsUnlocked (Handler);
389+ }
390+ }
391+ }
392+
393+ if (SubmitInfo.PostProcessorFunc ())
394+ handlerPostProcess (Handler, SubmitInfo.PostProcessorFunc (), Event);
350395
351396 const auto &EventImpl = detail::getSyclObjImpl (Event);
352397 for (auto &Stream : Streams) {
@@ -370,63 +415,14 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
370415#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
371416event queue_impl::submit_impl (const detail::type_erased_cgfo_ty &CGF,
372417 const std::shared_ptr<queue_impl> &Self,
373- const std::shared_ptr<queue_impl> &PrimaryQueue ,
418+ const std::shared_ptr<queue_impl> &,
374419 const std::shared_ptr<queue_impl> &SecondaryQueue,
375420 bool CallerNeedsEvent,
376421 const detail::code_location &Loc,
377422 bool IsTopCodeLoc,
378423 const SubmissionInfo &SubmitInfo) {
379- #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
380- detail::handler_impl HandlerImplVal (PrimaryQueue.get (), CallerNeedsEvent);
381- detail::handler_impl *HandlerImpl = &HandlerImplVal;
382- handler Handler (HandlerImpl, Self);
383- #else
384- handler Handler (Self, CallerNeedsEvent);
385- auto &HandlerImpl = detail::getSyclObjImpl (Handler);
386- #endif
387-
388- #if XPTI_ENABLE_INSTRUMENTATION
389- if (xptiTraceEnabled ()) {
390- Handler.saveCodeLoc (Loc, IsTopCodeLoc);
391- }
392- #endif
393-
394- {
395- NestedCallsTracker tracker;
396- CGF (Handler);
397- }
398-
399- // Scheduler will later omit events, that are not required to execute tasks.
400- // Host and interop tasks, however, are not submitted to low-level runtimes
401- // and require separate dependency management.
402- const CGType Type = HandlerImpl->MCGType ;
403- std::vector<StreamImplPtr> Streams;
404- if (Type == CGType::Kernel)
405- Streams = std::move (Handler.MStreamStorage );
406-
407- HandlerImpl->MEventMode = SubmitInfo.EventMode ();
408-
409- auto Event = finalizeHandler (Handler, SubmitInfo.PostProcessorFunc ());
410-
411- addEvent (Event);
412-
413- const auto &EventImpl = detail::getSyclObjImpl (Event);
414- for (auto &Stream : Streams) {
415- // We don't want stream flushing to be blocking operation that is why submit
416- // a host task to print stream buffer. It will fire up as soon as the kernel
417- // finishes execution.
418- auto L = [&](handler &ServiceCGH) {
419- Stream->generateFlushCommand (ServiceCGH);
420- };
421- detail::type_erased_cgfo_ty CGF{L};
422- event FlushEvent =
423- submit_impl (CGF, Self, PrimaryQueue, SecondaryQueue,
424- /* CallerNeedsEvent*/ true , Loc, IsTopCodeLoc, {});
425- EventImpl->attachEventToCompleteWeak (detail::getSyclObjImpl (FlushEvent));
426- registerStreamServiceEvent (detail::getSyclObjImpl (FlushEvent));
427- }
428-
429- return Event;
424+ return submit_impl (CGF, Self, SecondaryQueue.get (), CallerNeedsEvent, Loc,
425+ IsTopCodeLoc, SubmitInfo);
430426}
431427#endif
432428
@@ -467,24 +463,19 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
467463 const std::vector<event> &ExpandedDepEvents =
468464 getExtendDependencyList (DepEvents, MutableDepEvents, Lock);
469465
466+ MEmpty = false ;
467+
470468 // If we have a command graph set we need to capture the op through the
471469 // handler rather than by-passing the scheduler.
472470 if (MGraph.expired () && Scheduler::areEventsSafeForSchedulerBypass (
473471 ExpandedDepEvents, MContext)) {
474- if (!CallerNeedsEvent && supportsDiscardingPiEvents ()) {
472+ auto isNoEventsMode = trySwitchingToNoEventsMode ();
473+ if (!CallerNeedsEvent && isNoEventsMode) {
475474 NestedCallsTracker tracker;
476475 MemOpFunc (MemOpArgs..., getUrEvents (ExpandedDepEvents),
477476 /* PiEvent*/ nullptr );
478477
479- event DiscardedEvent = createDiscardedEvent ();
480- if (isInOrder ()) {
481- // Store the discarded event for proper in-order dependency tracking.
482- auto &EventToStoreIn = MGraph.expired ()
483- ? MDefaultGraphDeps.LastEventPtr
484- : MExtGraphDeps.LastEventPtr ;
485- EventToStoreIn = detail::getSyclObjImpl (DiscardedEvent);
486- }
487- return DiscardedEvent;
478+ return createDiscardedEvent ();
488479 }
489480
490481 event ResEvent = prepareSYCLEventAssociatedWithQueue (Self);
@@ -509,7 +500,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
509500 }
510501 }
511502
512- if (isInOrder ()) {
503+ if (isInOrder () &&
504+ (!isNoEventsMode || MContext->getBackend () == backend::opencl)) {
513505 auto &EventToStoreIn = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
514506 : MExtGraphDeps.LastEventPtr ;
515507 EventToStoreIn = EventImpl;
@@ -637,9 +629,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
637629 }
638630
639631 std::vector<std::weak_ptr<event_impl>> WeakEvents;
632+ EventImplPtr LastEvent;
640633 {
641634 std::lock_guard<std::mutex> Lock (MMutex);
642635 WeakEvents.swap (MEventsWeak);
636+ LastEvent = MDefaultGraphDeps.LastEventPtr ;
643637
644638 MMissedCleanupRequests.unset (
645639 [&](MissedCleanupRequestsType &MissedCleanupRequests) {
@@ -664,6 +658,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
664658 }
665659 }
666660 }
661+
662+ if (LastEvent) {
663+ LastEvent->wait (LastEvent);
664+ }
665+
667666 const AdapterPtr &Adapter = getAdapter ();
668667 Adapter->call <UrApiKind::urQueueFinish>(getHandleRef ());
669668
@@ -755,18 +754,14 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
755754}
756755
757756bool queue_impl::queue_empty () const {
758- // If we have in-order queue where events are not discarded then just check
759- // the status of the last event.
757+ // If we have in-order queue with non-empty last event, just check its status.
760758 if (isInOrder ()) {
761759 std::lock_guard<std::mutex> Lock (MMutex);
762- // If there is no last event we know that no work has been submitted, so it
763- // must be trivially empty.
764- if (!MDefaultGraphDeps.LastEventPtr )
760+ if (MEmpty)
765761 return true ;
766- // Otherwise, check if the last event is finished.
767- // Note that we fall back to the backend query if the event was discarded,
768- // which may happend despite the queue not being a discard event queue.
769- if (!MDefaultGraphDeps.LastEventPtr ->isDiscarded ())
762+
763+ if (MDefaultGraphDeps.LastEventPtr &&
764+ !MDefaultGraphDeps.LastEventPtr ->isDiscarded ())
770765 return MDefaultGraphDeps.LastEventPtr
771766 ->get_info <info::event::command_execution_status>() ==
772767 info::event_command_status::complete;
@@ -779,6 +774,11 @@ bool queue_impl::queue_empty() const {
779774 if (!IsReady)
780775 return false ;
781776
777+ // If got here, it means that LastEventPtr is nullptr (so no possible Host
778+ // Tasks) and there is nothing executing on the device.
779+ if (isInOrder ())
780+ return true ;
781+
782782 // We may have events like host tasks which are not submitted to the backend
783783 // queue so we need to get their status separately.
784784 std::lock_guard<std::mutex> Lock (MMutex);
0 commit comments