@@ -781,79 +781,121 @@ class queue_impl {
781781 return ResEvent;
782782 }
783783
784- // template is needed for proper unit testing
785784 template <typename HandlerType = handler>
786- void finalizeHandler (HandlerType &Handler, event &EventRet) {
787- if (MIsInorder) {
788- // Accessing and changing of an event isn't atomic operation.
789- // Hence, here is the lock for thread-safety.
790- std::lock_guard<std::mutex> Lock{MMutex};
791-
792- auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
793- : MExtGraphDeps.LastEventPtr ;
794-
795- // This dependency is needed for the following purposes:
796- // - host tasks are handled by the runtime and cannot be implicitly
797- // synchronized by the backend.
798- // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
799- // by a host task. This dependency allows to build the enqueue order in
800- // the RT but will not be passed to the backend. See getPIEvents in
801- // Command.
802- if (EventToBuildDeps) {
803- // In the case where the last event was discarded and we are to run a
804- // host_task, we insert a barrier into the queue and use the resulting
805- // event as the dependency for the host_task.
806- // Note that host_task events can never be discarded, so this will not
807- // insert barriers between host_task enqueues.
808- if (EventToBuildDeps->isDiscarded () &&
809- getSyclObjImpl (Handler)->MCGType == CGType::CodeplayHostTask)
810- EventToBuildDeps = insertHelperBarrier (Handler);
811-
812- if (!EventToBuildDeps->isDiscarded ())
813- Handler.depends_on (EventToBuildDeps);
814- }
785+ event finalizeHandlerInOrder (HandlerType &Handler) {
786+ // Accessing and changing of an event isn't atomic operation.
787+ // Hence, here is the lock for thread-safety.
788+ std::lock_guard<std::mutex> Lock{MMutex};
789+
790+ auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
791+ : MExtGraphDeps.LastEventPtr ;
792+
793+ // This dependency is needed for the following purposes:
794+ // - host tasks are handled by the runtime and cannot be implicitly
795+ // synchronized by the backend.
796+ // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
797+ // by a host task. This dependency allows to build the enqueue order in
798+ // the RT but will not be passed to the backend. See getPIEvents in
799+ // Command.
800+ if (EventToBuildDeps) {
801+ // In the case where the last event was discarded and we are to run a
802+ // host_task, we insert a barrier into the queue and use the resulting
803+ // event as the dependency for the host_task.
804+ // Note that host_task events can never be discarded, so this will not
805+ // insert barriers between host_task enqueues.
806+ if (EventToBuildDeps->isDiscarded () &&
807+ getSyclObjImpl (Handler)->MCGType == CGType::CodeplayHostTask)
808+ EventToBuildDeps = insertHelperBarrier (Handler);
809+
810+ if (!EventToBuildDeps->isDiscarded ())
811+ Handler.depends_on (EventToBuildDeps);
812+ }
813+
814+ // If there is an external event set, add it as a dependency and clear it.
815+ // We do not need to hold the lock as MLastEventMtx will ensure the last
816+ // event reflects the corresponding external event dependence as well.
817+ std::optional<event> ExternalEvent = popExternalEvent ();
818+ if (ExternalEvent)
819+ Handler.depends_on (*ExternalEvent);
820+
821+ auto EventRet = Handler.finalize ();
822+ EventToBuildDeps = getSyclObjImpl (EventRet);
823+
824+ return EventRet;
825+ }
826+
827+ template <typename HandlerType = handler>
828+ event finalizeHandlerOutOfOrder (HandlerType &Handler) {
829+ const CGType Type = getSyclObjImpl (Handler)->MCGType ;
830+ std::lock_guard<std::mutex> Lock{MMutex};
831+ // The following code supports barrier synchronization if host task is
832+ // involved in the scenario. Native barriers cannot handle host task
833+ // dependency so in the case where some commands were not enqueued
834+ // (blocked), we track them to prevent barrier from being enqueued
835+ // earlier.
836+ {
837+ std::lock_guard<std::mutex> RequestLock (MMissedCleanupRequestsMtx);
838+ for (auto &UpdatedGraph : MMissedCleanupRequests)
839+ doUnenqueuedCommandCleanup (UpdatedGraph);
840+ MMissedCleanupRequests.clear ();
841+ }
842+ auto &Deps = MGraph.expired () ? MDefaultGraphDeps : MExtGraphDeps;
843+ if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents .empty ()) {
844+ Handler.depends_on (Deps.UnenqueuedCmdEvents );
845+ }
846+ if (Deps.LastBarrier &&
847+ (Type == CGType::CodeplayHostTask || (!Deps.LastBarrier ->isEnqueued ())))
848+ Handler.depends_on (Deps.LastBarrier );
849+
850+ auto EventRet = Handler.finalize ();
851+ EventImplPtr EventRetImpl = getSyclObjImpl (EventRet);
852+ if (Type == CGType::CodeplayHostTask)
853+ Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
854+ else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
855+ Deps.LastBarrier = EventRetImpl;
856+ Deps.UnenqueuedCmdEvents .clear ();
857+ } else if (!EventRetImpl->isEnqueued ()) {
858+ Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
859+ }
860+
861+ return EventRet;
862+ }
863+
864+ template <typename HandlerType = handler>
865+ event finalizeHandlerPostProcess (
866+ HandlerType &Handler,
867+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
868+ auto HandlerImpl = detail::getSyclObjImpl (Handler);
869+ const CGType Type = HandlerImpl->MCGType ;
870+
871+ bool IsKernel = Type == CGType::Kernel;
872+ bool KernelUsesAssert = false ;
873+
874+ if (IsKernel)
875+ // Kernel only uses assert if it's non interop one
876+ KernelUsesAssert = !(Handler.MKernel && Handler.MKernel ->isInterop ()) &&
877+ ProgramManager::getInstance ().kernelUsesAssert (
878+ Handler.MKernelName .c_str ());
879+
880+ auto Event = MIsInorder ? finalizeHandlerInOrder (Handler)
881+ : finalizeHandlerOutOfOrder (Handler);
882+
883+ auto &PostProcess = *PostProcessorFunc;
815884
816- // If there is an external event set, add it as a dependency and clear it.
817- // We do not need to hold the lock as MLastEventMtx will ensure the last
818- // event reflects the corresponding external event dependence as well.
819- std::optional<event> ExternalEvent = popExternalEvent ();
820- if (ExternalEvent)
821- Handler.depends_on (*ExternalEvent);
885+ PostProcess (IsKernel, KernelUsesAssert, Event);
822886
823- EventRet = Handler.finalize ();
824- EventToBuildDeps = getSyclObjImpl (EventRet);
887+ return Event;
888+ }
889+
890+ // template is needed for proper unit testing
891+ template <typename HandlerType = handler>
892+ event finalizeHandler (HandlerType &Handler,
893+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
894+ if (PostProcessorFunc) {
895+ return finalizeHandlerPostProcess (Handler, PostProcessorFunc);
825896 } else {
826- const CGType Type = getSyclObjImpl (Handler)->MCGType ;
827- std::lock_guard<std::mutex> Lock{MMutex};
828- // The following code supports barrier synchronization if host task is
829- // involved in the scenario. Native barriers cannot handle host task
830- // dependency so in the case where some commands were not enqueued
831- // (blocked), we track them to prevent barrier from being enqueued
832- // earlier.
833- {
834- std::lock_guard<std::mutex> RequestLock (MMissedCleanupRequestsMtx);
835- for (auto &UpdatedGraph : MMissedCleanupRequests)
836- doUnenqueuedCommandCleanup (UpdatedGraph);
837- MMissedCleanupRequests.clear ();
838- }
839- auto &Deps = MGraph.expired () ? MDefaultGraphDeps : MExtGraphDeps;
840- if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents .empty ()) {
841- Handler.depends_on (Deps.UnenqueuedCmdEvents );
842- }
843- if (Deps.LastBarrier && (Type == CGType::CodeplayHostTask ||
844- (!Deps.LastBarrier ->isEnqueued ())))
845- Handler.depends_on (Deps.LastBarrier );
846-
847- EventRet = Handler.finalize ();
848- EventImplPtr EventRetImpl = getSyclObjImpl (EventRet);
849- if (Type == CGType::CodeplayHostTask)
850- Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
851- else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
852- Deps.LastBarrier = EventRetImpl;
853- Deps.UnenqueuedCmdEvents .clear ();
854- } else if (!EventRetImpl->isEnqueued ()) {
855- Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
856- }
897+ return MIsInorder ? finalizeHandlerInOrder (Handler)
898+ : finalizeHandlerOutOfOrder (Handler);
857899 }
858900 }
859901
0 commit comments