@@ -724,12 +724,16 @@ void exec_graph_impl::findRealDeps(
724724 }
725725}
726726
727- ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect (
728- const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl,
729- ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node) {
727+ std::optional<ur_exp_command_buffer_sync_point_t >
728+ exec_graph_impl::enqueueNodeDirect (const sycl::context &Ctx,
729+ sycl::detail::device_impl &DeviceImpl,
730+ ur_exp_command_buffer_handle_t CommandBuffer,
731+ node_impl &Node, bool IsInOrderPartition) {
730732 std::vector<ur_exp_command_buffer_sync_point_t > Deps;
731- for (node_impl &N : Node.predecessors ()) {
732- findRealDeps (Deps, N, MPartitionNodes[&Node]);
733+ if (!IsInOrderPartition) {
734+ for (node_impl &N : Node.predecessors ()) {
735+ findRealDeps (Deps, N, MPartitionNodes[&Node]);
736+ }
733737 }
734738 ur_exp_command_buffer_sync_point_t NewSyncPoint;
735739 ur_exp_command_buffer_command_handle_t NewCommand = 0 ;
@@ -758,7 +762,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
758762 ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel (
759763 Ctx, DeviceImpl, CommandBuffer,
760764 *static_cast <sycl::detail::CGExecKernel *>((Node.MCommandGroup .get ())),
761- Deps, &NewSyncPoint, MIsUpdatable ? &NewCommand : nullptr , nullptr );
765+ Deps, IsInOrderPartition ? nullptr : &NewSyncPoint,
766+ MIsUpdatable ? &NewCommand : nullptr , nullptr );
762767
763768 if (MIsUpdatable) {
764769 MCommandMap[&Node] = NewCommand;
@@ -775,16 +780,21 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
775780 StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr );
776781#endif
777782
778- return NewSyncPoint;
783+ // Linear (in-order) graphs do not return a sync point as the dependencies of
784+ // successor nodes are handled by the UR CommandBuffer via the isInOrder flag
785+ return IsInOrderPartition
786+ ? std::nullopt
787+ : std::optional<ur_exp_command_buffer_sync_point_t >{NewSyncPoint};
779788}
780789
781- ur_exp_command_buffer_sync_point_t
790+ std::optional< ur_exp_command_buffer_sync_point_t >
782791exec_graph_impl::enqueueNode (ur_exp_command_buffer_handle_t CommandBuffer,
783- node_impl &Node) {
784-
792+ node_impl &Node, bool IsInOrderPartition) {
785793 std::vector<ur_exp_command_buffer_sync_point_t > Deps;
786- for (node_impl &N : Node.predecessors ()) {
787- findRealDeps (Deps, N, MPartitionNodes[&Node]);
794+ if (!IsInOrderPartition) {
795+ for (node_impl &N : Node.predecessors ()) {
796+ findRealDeps (Deps, N, MPartitionNodes[&Node]);
797+ }
788798 }
789799
790800 sycl::detail::EventImplPtr Event =
@@ -796,7 +806,11 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
796806 MCommandMap[&Node] = Event->getCommandBufferCommand ();
797807 }
798808
799- return Event->getSyncPoint ();
809+ // Linear (in-order) graphs do not return a sync point as the dependencies of
810+ // successor nodes are handled by the UR CommandBuffer via the isInOrder flag
811+ return IsInOrderPartition ? std::nullopt
812+ : std::optional<ur_exp_command_buffer_sync_point_t >{
813+ Event->getSyncPoint ()};
800814}
801815
802816void exec_graph_impl::buildRequirements () {
@@ -825,10 +839,12 @@ void exec_graph_impl::buildRequirements() {
825839
826840void exec_graph_impl::createCommandBuffers (
827841 sycl::device Device, std::shared_ptr<partition> &Partition) {
842+ const bool IsInOrderCommandBuffer =
843+ Partition->MIsInOrderGraph && !MEnableProfiling;
828844 ur_exp_command_buffer_handle_t OutCommandBuffer;
829- ur_exp_command_buffer_desc_t Desc{
830- UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr , MIsUpdatable,
831- Partition-> MIsInOrderGraph && !MEnableProfiling , MEnableProfiling};
845+ ur_exp_command_buffer_desc_t Desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC,
846+ nullptr , MIsUpdatable,
847+ IsInOrderCommandBuffer , MEnableProfiling};
832848 context_impl &ContextImpl = *sycl::detail::getSyclObjImpl (MContext);
833849 sycl::detail::adapter_impl &Adapter = ContextImpl.getAdapter ();
834850 sycl::detail::device_impl &DeviceImpl = *sycl::detail::getSyclObjImpl (Device);
@@ -857,10 +873,20 @@ void exec_graph_impl::createCommandBuffers(
857873 Node.MCommandGroup .get ())
858874 ->MStreams .size () ==
859875 0 ) {
860- MSyncPoints[&Node] =
861- enqueueNodeDirect (MContext, DeviceImpl, OutCommandBuffer, Node);
876+ if (auto OptSyncPoint =
877+ enqueueNodeDirect (MContext, DeviceImpl, OutCommandBuffer, Node,
878+ IsInOrderCommandBuffer)) {
879+ assert (!IsInOrderCommandBuffer &&
880+ " In-order partitions should not create a sync point" );
881+ MSyncPoints[&Node] = *OptSyncPoint;
882+ }
862883 } else {
863- MSyncPoints[&Node] = enqueueNode (OutCommandBuffer, Node);
884+ if (auto OptSyncPoint =
885+ enqueueNode (OutCommandBuffer, Node, IsInOrderCommandBuffer)) {
886+ assert (!IsInOrderCommandBuffer &&
887+ " In-order partitions should not create a sync point" );
888+ MSyncPoints[&Node] = *OptSyncPoint;
889+ }
864890 }
865891 }
866892
0 commit comments