Skip to content

Commit 3c12bbc

Browse files
Merge pull request #1910 from Bensuo/sync_point
[CUDA][HIP] Improve command-buffer sync points
2 parents 83f7ad9 + ac7eb17 commit 3c12bbc

12 files changed

+642
-409
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 117 additions & 179 deletions
Large diffs are not rendered by default.

source/adapters/cuda/command_buffer.hpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@
4242
struct ur_exp_command_buffer_command_handle_t_ {
4343
ur_exp_command_buffer_command_handle_t_(
4444
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
45-
std::shared_ptr<CUgraphNode> &&Node, CUDA_KERNEL_NODE_PARAMS Params,
46-
uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr,
47-
const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr);
45+
CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim,
46+
const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr,
47+
const size_t *LocalWorkSizePtr);
4848

4949
void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) {
5050
const size_t CopySize = sizeof(size_t) * WorkDim;
@@ -97,7 +97,7 @@ struct ur_exp_command_buffer_command_handle_t_ {
9797

9898
ur_exp_command_buffer_handle_t CommandBuffer;
9999
ur_kernel_handle_t Kernel;
100-
std::shared_ptr<CUgraphNode> Node;
100+
CUgraphNode Node;
101101
CUDA_KERNEL_NODE_PARAMS Params;
102102

103103
uint32_t WorkDim;
@@ -118,8 +118,8 @@ struct ur_exp_command_buffer_handle_t_ {
118118
~ur_exp_command_buffer_handle_t_();
119119

120120
void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
121-
std::shared_ptr<CUgraphNode> CuNode) {
122-
SyncPoints[SyncPoint] = std::move(CuNode);
121+
CUgraphNode CuNode) {
122+
SyncPoints[SyncPoint] = CuNode;
123123
NextSyncPoint++;
124124
}
125125

@@ -130,8 +130,7 @@ struct ur_exp_command_buffer_handle_t_ {
130130
// Helper to register next sync point
131131
// @param CuNode Node to register as next sync point
132132
// @return Pointer to the sync that registers the Node
133-
ur_exp_command_buffer_sync_point_t
134-
addSyncPoint(std::shared_ptr<CUgraphNode> CuNode) {
133+
ur_exp_command_buffer_sync_point_t addSyncPoint(CUgraphNode CuNode) {
135134
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
136135
registerSyncPoint(SyncPoint, std::move(CuNode));
137136
return SyncPoint;
@@ -173,8 +172,7 @@ struct ur_exp_command_buffer_handle_t_ {
173172
std::atomic_uint32_t RefCountExternal;
174173

175174
// Map of sync_points to ur_events
176-
std::unordered_map<ur_exp_command_buffer_sync_point_t,
177-
std::shared_ptr<CUgraphNode>>
175+
std::unordered_map<ur_exp_command_buffer_sync_point_t, CUgraphNode>
178176
SyncPoints;
179177
// Next sync_point value (may need to consider ways to reuse values if 32-bits
180178
// is not enough)

0 commit comments

Comments
 (0)