Skip to content

Commit a67445c

Browse files
authored
[SYCL][Graph] Remove UR sync point tracking in linear graphs (#20291)
When the command buffer in a partition is created with property `isInOrder`, explicit sync point dependencies are ignored by UR. We currently set this property when the graph is linear but still create and store sync points. We should avoid tracking sync points in this case as it adds unnecessary overhead to graph finalization.
1 parent 6276f1b commit a67445c

File tree

2 files changed

+64
-26
lines changed

2 files changed

+64
-26
lines changed

sycl/source/detail/graph/graph_impl.cpp

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -724,12 +724,16 @@ void exec_graph_impl::findRealDeps(
724724
}
725725
}
726726

727-
ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
728-
const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl,
729-
ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node) {
727+
std::optional<ur_exp_command_buffer_sync_point_t>
728+
exec_graph_impl::enqueueNodeDirect(const sycl::context &Ctx,
729+
sycl::detail::device_impl &DeviceImpl,
730+
ur_exp_command_buffer_handle_t CommandBuffer,
731+
node_impl &Node, bool IsInOrderPartition) {
730732
std::vector<ur_exp_command_buffer_sync_point_t> Deps;
731-
for (node_impl &N : Node.predecessors()) {
732-
findRealDeps(Deps, N, MPartitionNodes[&Node]);
733+
if (!IsInOrderPartition) {
734+
for (node_impl &N : Node.predecessors()) {
735+
findRealDeps(Deps, N, MPartitionNodes[&Node]);
736+
}
733737
}
734738
ur_exp_command_buffer_sync_point_t NewSyncPoint;
735739
ur_exp_command_buffer_command_handle_t NewCommand = 0;
@@ -758,7 +762,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
758762
ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel(
759763
Ctx, DeviceImpl, CommandBuffer,
760764
*static_cast<sycl::detail::CGExecKernel *>((Node.MCommandGroup.get())),
761-
Deps, &NewSyncPoint, MIsUpdatable ? &NewCommand : nullptr, nullptr);
765+
Deps, IsInOrderPartition ? nullptr : &NewSyncPoint,
766+
MIsUpdatable ? &NewCommand : nullptr, nullptr);
762767

763768
if (MIsUpdatable) {
764769
MCommandMap[&Node] = NewCommand;
@@ -775,16 +780,21 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
775780
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr);
776781
#endif
777782

778-
return NewSyncPoint;
783+
// Linear (in-order) graphs do not return a sync point as the dependencies of
784+
// successor nodes are handled by the UR CommandBuffer via the isInOrder flag
785+
return IsInOrderPartition
786+
? std::nullopt
787+
: std::optional<ur_exp_command_buffer_sync_point_t>{NewSyncPoint};
779788
}
780789

781-
ur_exp_command_buffer_sync_point_t
790+
std::optional<ur_exp_command_buffer_sync_point_t>
782791
exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
783-
node_impl &Node) {
784-
792+
node_impl &Node, bool IsInOrderPartition) {
785793
std::vector<ur_exp_command_buffer_sync_point_t> Deps;
786-
for (node_impl &N : Node.predecessors()) {
787-
findRealDeps(Deps, N, MPartitionNodes[&Node]);
794+
if (!IsInOrderPartition) {
795+
for (node_impl &N : Node.predecessors()) {
796+
findRealDeps(Deps, N, MPartitionNodes[&Node]);
797+
}
788798
}
789799

790800
sycl::detail::EventImplPtr Event =
@@ -796,7 +806,11 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
796806
MCommandMap[&Node] = Event->getCommandBufferCommand();
797807
}
798808

799-
return Event->getSyncPoint();
809+
// Linear (in-order) graphs do not return a sync point as the dependencies of
810+
// successor nodes are handled by the UR CommandBuffer via the isInOrder flag
811+
return IsInOrderPartition ? std::nullopt
812+
: std::optional<ur_exp_command_buffer_sync_point_t>{
813+
Event->getSyncPoint()};
800814
}
801815

802816
void exec_graph_impl::buildRequirements() {
@@ -825,10 +839,12 @@ void exec_graph_impl::buildRequirements() {
825839

826840
void exec_graph_impl::createCommandBuffers(
827841
sycl::device Device, std::shared_ptr<partition> &Partition) {
842+
const bool IsInOrderCommandBuffer =
843+
Partition->MIsInOrderGraph && !MEnableProfiling;
828844
ur_exp_command_buffer_handle_t OutCommandBuffer;
829-
ur_exp_command_buffer_desc_t Desc{
830-
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, MIsUpdatable,
831-
Partition->MIsInOrderGraph && !MEnableProfiling, MEnableProfiling};
845+
ur_exp_command_buffer_desc_t Desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC,
846+
nullptr, MIsUpdatable,
847+
IsInOrderCommandBuffer, MEnableProfiling};
832848
context_impl &ContextImpl = *sycl::detail::getSyclObjImpl(MContext);
833849
sycl::detail::adapter_impl &Adapter = ContextImpl.getAdapter();
834850
sycl::detail::device_impl &DeviceImpl = *sycl::detail::getSyclObjImpl(Device);
@@ -857,10 +873,20 @@ void exec_graph_impl::createCommandBuffers(
857873
Node.MCommandGroup.get())
858874
->MStreams.size() ==
859875
0) {
860-
MSyncPoints[&Node] =
861-
enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node);
876+
if (auto OptSyncPoint =
877+
enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node,
878+
IsInOrderCommandBuffer)) {
879+
assert(!IsInOrderCommandBuffer &&
880+
"In-order partitions should not create a sync point");
881+
MSyncPoints[&Node] = *OptSyncPoint;
882+
}
862883
} else {
863-
MSyncPoints[&Node] = enqueueNode(OutCommandBuffer, Node);
884+
if (auto OptSyncPoint =
885+
enqueueNode(OutCommandBuffer, Node, IsInOrderCommandBuffer)) {
886+
assert(!IsInOrderCommandBuffer &&
887+
"In-order partitions should not create a sync point");
888+
MSyncPoints[&Node] = *OptSyncPoint;
889+
}
864890
}
865891
}
866892

sycl/source/detail/graph/graph_impl.hpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <functional> // for function
1919
#include <list> // for list
2020
#include <memory> // for shared_ptr
21+
#include <optional> // for optional
2122
#include <set> // for set
2223
#include <shared_mutex> // for shared_mutex
2324
#include <vector> // for vector
@@ -735,20 +736,31 @@ class exec_graph_impl {
735736
/// through the scheduler.
736737
/// @param CommandBuffer Command-buffer to add node to as a command.
737738
/// @param Node The node being enqueued.
738-
/// @return UR sync point created for this node in the command-buffer.
739-
ur_exp_command_buffer_sync_point_t
740-
enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node);
739+
/// @param IsInOrderPartition True if the partition associated with the node
740+
/// is a linear (in-order) graph.
741+
/// @return Optional UR sync point created for this node in the
742+
/// command-buffer. std::nullopt is returned only if the associated partition
743+
/// of the node is linear.
744+
std::optional<ur_exp_command_buffer_sync_point_t>
745+
enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node,
746+
bool IsInOrderPartition);
741747

742748
/// Enqueue a node directly to the command-buffer without going through the
743749
/// scheduler.
744750
/// @param Ctx Context to use.
745751
/// @param DeviceImpl Device associated with the enqueue.
746752
/// @param CommandBuffer Command-buffer to add node to as a command.
747753
/// @param Node The node being enqueued.
748-
/// @return UR sync point created for this node in the command-buffer.
749-
ur_exp_command_buffer_sync_point_t enqueueNodeDirect(
750-
const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl,
751-
ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node);
754+
/// @param IsInOrderPartition True if the partition associated with the node
755+
/// is a linear (in-order) graph.
756+
/// @return Optional UR sync point created for this node in the
757+
/// command-buffer. std::nullopt is returned only if the associated partition
758+
/// of the node is linear.
759+
std::optional<ur_exp_command_buffer_sync_point_t>
760+
enqueueNodeDirect(const sycl::context &Ctx,
761+
sycl::detail::device_impl &DeviceImpl,
762+
ur_exp_command_buffer_handle_t CommandBuffer,
763+
node_impl &Node, bool IsInOrderPartition);
752764

753765
/// Enqueues a host-task partition (i.e. a partition that contains only a
754766
/// single node and that node is a host-task).

0 commit comments

Comments
 (0)