Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 45 additions & 19 deletions sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,12 +717,16 @@ void exec_graph_impl::findRealDeps(
}
}

ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl,
ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node) {
std::optional<ur_exp_command_buffer_sync_point_t>
exec_graph_impl::enqueueNodeDirect(const sycl::context &Ctx,
sycl::detail::device_impl &DeviceImpl,
ur_exp_command_buffer_handle_t CommandBuffer,
node_impl &Node, bool IsInOrderPartition) {
std::vector<ur_exp_command_buffer_sync_point_t> Deps;
for (node_impl &N : Node.predecessors()) {
findRealDeps(Deps, N, MPartitionNodes[&Node]);
if (!IsInOrderPartition) {
for (node_impl &N : Node.predecessors()) {
findRealDeps(Deps, N, MPartitionNodes[&Node]);
}
}
ur_exp_command_buffer_sync_point_t NewSyncPoint;
ur_exp_command_buffer_command_handle_t NewCommand = 0;
Expand Down Expand Up @@ -751,7 +755,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel(
Ctx, DeviceImpl, CommandBuffer,
*static_cast<sycl::detail::CGExecKernel *>((Node.MCommandGroup.get())),
Deps, &NewSyncPoint, MIsUpdatable ? &NewCommand : nullptr, nullptr);
Deps, IsInOrderPartition ? nullptr : &NewSyncPoint,
MIsUpdatable ? &NewCommand : nullptr, nullptr);

if (MIsUpdatable) {
MCommandMap[&Node] = NewCommand;
Expand All @@ -768,16 +773,21 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr);
#endif

return NewSyncPoint;
// Linear (in-order) graphs do not return a sync point as the dependencies of
// successor nodes are handled by the UR CommandBuffer via the isInOrder flag
return IsInOrderPartition
? std::nullopt
: std::optional<ur_exp_command_buffer_sync_point_t>{NewSyncPoint};
}

ur_exp_command_buffer_sync_point_t
std::optional<ur_exp_command_buffer_sync_point_t>
exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
node_impl &Node) {

node_impl &Node, bool IsInOrderPartition) {
std::vector<ur_exp_command_buffer_sync_point_t> Deps;
for (node_impl &N : Node.predecessors()) {
findRealDeps(Deps, N, MPartitionNodes[&Node]);
if (!IsInOrderPartition) {
for (node_impl &N : Node.predecessors()) {
findRealDeps(Deps, N, MPartitionNodes[&Node]);
}
}

sycl::detail::EventImplPtr Event =
Expand All @@ -789,7 +799,11 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
MCommandMap[&Node] = Event->getCommandBufferCommand();
}

return Event->getSyncPoint();
// Linear (in-order) graphs do not return a sync point as the dependencies of
// successor nodes are handled by the UR CommandBuffer via the isInOrder flag
return IsInOrderPartition ? std::nullopt
: std::optional<ur_exp_command_buffer_sync_point_t>{
Event->getSyncPoint()};
}

void exec_graph_impl::buildRequirements() {
Expand Down Expand Up @@ -818,10 +832,12 @@ void exec_graph_impl::buildRequirements() {

void exec_graph_impl::createCommandBuffers(
sycl::device Device, std::shared_ptr<partition> &Partition) {
const bool IsInOrderCommandBuffer =
Partition->MIsInOrderGraph && !MEnableProfiling;
ur_exp_command_buffer_handle_t OutCommandBuffer;
ur_exp_command_buffer_desc_t Desc{
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, MIsUpdatable,
Partition->MIsInOrderGraph && !MEnableProfiling, MEnableProfiling};
ur_exp_command_buffer_desc_t Desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC,
nullptr, MIsUpdatable,
IsInOrderCommandBuffer, MEnableProfiling};
context_impl &ContextImpl = *sycl::detail::getSyclObjImpl(MContext);
sycl::detail::adapter_impl &Adapter = ContextImpl.getAdapter();
sycl::detail::device_impl &DeviceImpl = *sycl::detail::getSyclObjImpl(Device);
Expand Down Expand Up @@ -850,10 +866,20 @@ void exec_graph_impl::createCommandBuffers(
Node.MCommandGroup.get())
->MStreams.size() ==
0) {
MSyncPoints[&Node] =
enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node);
if (auto OptSyncPoint =
enqueueNodeDirect(MContext, DeviceImpl, OutCommandBuffer, Node,
IsInOrderCommandBuffer)) {
assert(!IsInOrderCommandBuffer &&
"In-order partitions should not create a sync point");
MSyncPoints[&Node] = *OptSyncPoint;
}
} else {
MSyncPoints[&Node] = enqueueNode(OutCommandBuffer, Node);
if (auto OptSyncPoint =
enqueueNode(OutCommandBuffer, Node, IsInOrderCommandBuffer)) {
assert(!IsInOrderCommandBuffer &&
"In-order partitions should not create a sync point");
MSyncPoints[&Node] = *OptSyncPoint;
}
}
}

Expand Down
26 changes: 19 additions & 7 deletions sycl/source/detail/graph/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <functional> // for function
#include <list> // for list
#include <memory> // for shared_ptr
#include <optional> // for optional
#include <set> // for set
#include <shared_mutex> // for shared_mutex
#include <vector> // for vector
Expand Down Expand Up @@ -733,20 +734,31 @@ class exec_graph_impl {
/// through the scheduler.
/// @param CommandBuffer Command-buffer to add node to as a command.
/// @param Node The node being enqueued.
/// @return UR sync point created for this node in the command-buffer.
ur_exp_command_buffer_sync_point_t
enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node);
/// @param IsInOrderPartition True if the partition associated with the node
/// is a linear (in-order) graph.
/// @return Optional UR sync point created for this node in the
/// command-buffer. std::nullopt is returned only if the associated partition
/// of the node is linear.
std::optional<ur_exp_command_buffer_sync_point_t>
enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node,
bool IsInOrderPartition);

/// Enqueue a node directly to the command-buffer without going through the
/// scheduler.
/// @param Ctx Context to use.
/// @param DeviceImpl Device associated with the enqueue.
/// @param CommandBuffer Command-buffer to add node to as a command.
/// @param Node The node being enqueued.
/// @return UR sync point created for this node in the command-buffer.
ur_exp_command_buffer_sync_point_t enqueueNodeDirect(
const sycl::context &Ctx, sycl::detail::device_impl &DeviceImpl,
ur_exp_command_buffer_handle_t CommandBuffer, node_impl &Node);
/// @param IsInOrderPartition True if the partition associated with the node
/// is a linear (in-order) graph.
/// @return Optional UR sync point created for this node in the
/// command-buffer. std::nullopt is returned only if the associated partition
/// of the node is linear.
std::optional<ur_exp_command_buffer_sync_point_t>
enqueueNodeDirect(const sycl::context &Ctx,
sycl::detail::device_impl &DeviceImpl,
ur_exp_command_buffer_handle_t CommandBuffer,
node_impl &Node, bool IsInOrderPartition);

/// Enqueues a host-task partition (i.e. a partition that contains only a
/// single node and that node is a host-task).
Expand Down
Loading