diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 07e8994df57f6..f77e3a8e7adf3 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -717,12 +717,16 @@ void exec_graph_impl::findRealDeps( } } -ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( - const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl, - ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node) { +std::optional +exec_graph_impl::enqueueNodeDirect(const sycl::context &Ctx, + sycl::detail::device_impl &DeviceImpl, + ur_exp_command_buffer_handle_t CommandBuffer, + node_impl &Node, bool IsInOrderPartition) { std::vector Deps; - for (node_impl &N : Node.predecessors()) { - findRealDeps(Deps, N, MPartitionNodes[&Node]); + if (!IsInOrderPartition) { + for (node_impl &N : Node.predecessors()) { + findRealDeps(Deps, N, MPartitionNodes[&Node]); + } } ur_exp_command_buffer_sync_point_t NewSyncPoint; ur_exp_command_buffer_command_handle_t NewCommand = 0; @@ -751,7 +755,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel( Ctx, DeviceImpl, CommandBuffer, *static_cast((Node.MCommandGroup.get())), - Deps, &NewSyncPoint, MIsUpdatable ? &NewCommand : nullptr, nullptr); + Deps, IsInOrderPartition ? nullptr : &NewSyncPoint, + MIsUpdatable ? &NewCommand : nullptr, nullptr); if (MIsUpdatable) { MCommandMap[&Node] = NewCommand; @@ -768,16 +773,21 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr); #endif - return NewSyncPoint; + // Linear (in-order) graphs do not return a sync point as the dependencies of + // successor nodes are handled by the UR CommandBuffer via the isInOrder flag + return IsInOrderPartition + ? std::nullopt + : std::optional{NewSyncPoint}; } -ur_exp_command_buffer_sync_point_t +std::optional exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, - node_impl &Node) { - + node_impl &Node, bool IsInOrderPartition) { std::vector Deps; - for (node_impl &N : Node.predecessors()) { - findRealDeps(Deps, N, MPartitionNodes[&Node]); + if (!IsInOrderPartition) { + for (node_impl &N : Node.predecessors()) { + findRealDeps(Deps, N, MPartitionNodes[&Node]); + } } sycl::detail::EventImplPtr Event = @@ -789,7 +799,11 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, MCommandMap[&Node] = Event->getCommandBufferCommand(); } - return Event->getSyncPoint(); + // Linear (in-order) graphs do not return a sync point as the dependencies of + // successor nodes are handled by the UR CommandBuffer via the isInOrder flag + return IsInOrderPartition ? std::nullopt + : std::optional{ + Event->getSyncPoint()}; } void exec_graph_impl::buildRequirements() { @@ -818,10 +832,12 @@ void exec_graph_impl::buildRequirements() { void exec_graph_impl::createCommandBuffers( sycl::device Device, std::shared_ptr &Partition) { + const bool IsInOrderCommandBuffer = + Partition->MIsInOrderGraph && !MEnableProfiling; ur_exp_command_buffer_handle_t OutCommandBuffer; - ur_exp_command_buffer_desc_t Desc{ - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, MIsUpdatable, - Partition->MIsInOrderGraph && !MEnableProfiling, MEnableProfiling}; + ur_exp_command_buffer_desc_t Desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, + nullptr, MIsUpdatable, + IsInOrderCommandBuffer, MEnableProfiling}; context_impl &ContextImpl = *sycl::detail::getSyclObjImpl(MContext); sycl::detail::adapter_impl &Adapter = ContextImpl.getAdapter(); sycl::detail::device_impl &DeviceImpl = *sycl::detail::getSyclObjImpl(Device); @@ -850,10 +866,20 @@ void exec_graph_impl::createCommandBuffers( Node.MCommandGroup.get()) ->MStreams.size() == 0) { - MSyncPoints[&Node] = - enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node); + if (auto OptSyncPoint = + enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node, + IsInOrderCommandBuffer)) { + assert(!IsInOrderCommandBuffer && + "In-order partitions should not create a sync point"); + MSyncPoints[&Node] = *OptSyncPoint; + } } else { - MSyncPoints[&Node] = enqueueNode(OutCommandBuffer, Node); + if (auto OptSyncPoint = + enqueueNode(OutCommandBuffer, Node, IsInOrderCommandBuffer)) { + assert(!IsInOrderCommandBuffer && + "In-order partitions should not create a sync point"); + MSyncPoints[&Node] = *OptSyncPoint; + } } } diff --git a/sycl/source/detail/graph/graph_impl.hpp b/sycl/source/detail/graph/graph_impl.hpp index eedfcf0506bf3..df77320897848 100644 --- a/sycl/source/detail/graph/graph_impl.hpp +++ b/sycl/source/detail/graph/graph_impl.hpp @@ -18,6 +18,7 @@ #include // for function #include // for list #include // for shared_ptr +#include // for optional #include // for set #include // for shared_mutex #include // for vector @@ -733,9 +734,14 @@ class exec_graph_impl { /// through the scheduler. /// @param CommandBuffer Command-buffer to add node to as a command. /// @param Node The node being enqueued. - /// @return UR sync point created for this node in the command-buffer. - ur_exp_command_buffer_sync_point_t - enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node); + /// @param IsInOrderPartition True if the partition associated with the node + /// is a linear (in-order) graph. + /// @return Optional UR sync point created for this node in the + /// command-buffer. std::nullopt is returned only if the associated partition + /// of the node is linear. + std::optional + enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node, + bool IsInOrderPartition); /// Enqueue a node directly to the command-buffer without going through the /// scheduler. @@ -743,10 +749,16 @@ class exec_graph_impl { /// @param DeviceImpl Device associated with the enqueue. /// @param CommandBuffer Command-buffer to add node to as a command. /// @param Node The node being enqueued. - /// @return UR sync point created for this node in the command-buffer. - ur_exp_command_buffer_sync_point_t enqueueNodeDirect( - const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl, - ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node); + /// @param IsInOrderPartition True if the partition associated with the node + /// is a linear (in-order) graph. + /// @return Optional UR sync point created for this node in the + /// command-buffer. std::nullopt is returned only if the associated partition + /// of the node is linear. + std::optional + enqueueNodeDirect(const sycl::context &Ctx, + sycl::detail::device_impl &DeviceImpl, + ur_exp_command_buffer_handle_t CommandBuffer, + node_impl &Node, bool IsInOrderPartition); /// Enqueues a host-task partition (i.e. a partition that contains only a /// single node and that node is a host-task).