42
42
struct ur_exp_command_buffer_command_handle_t_ {
43
43
ur_exp_command_buffer_command_handle_t_ (
44
44
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
45
- std::shared_ptr< CUgraphNode> && Node, CUDA_KERNEL_NODE_PARAMS Params,
46
- uint32_t WorkDim , const size_t *GlobalWorkOffsetPtr ,
47
- const size_t *GlobalWorkSizePtr, const size_t * LocalWorkSizePtr);
45
+ CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim ,
46
+ const size_t *GlobalWorkOffsetPtr , const size_t *GlobalWorkSizePtr ,
47
+ const size_t *LocalWorkSizePtr);
48
48
49
49
void setGlobalOffset (const size_t *GlobalWorkOffsetPtr) {
50
50
const size_t CopySize = sizeof (size_t ) * WorkDim;
@@ -97,7 +97,7 @@ struct ur_exp_command_buffer_command_handle_t_ {
97
97
98
98
ur_exp_command_buffer_handle_t CommandBuffer;
99
99
ur_kernel_handle_t Kernel;
100
- std::shared_ptr< CUgraphNode> Node;
100
+ CUgraphNode Node;
101
101
CUDA_KERNEL_NODE_PARAMS Params;
102
102
103
103
uint32_t WorkDim;
@@ -118,8 +118,8 @@ struct ur_exp_command_buffer_handle_t_ {
118
118
~ur_exp_command_buffer_handle_t_ ();
119
119
120
120
void registerSyncPoint (ur_exp_command_buffer_sync_point_t SyncPoint,
121
- std::shared_ptr< CUgraphNode> CuNode) {
122
- SyncPoints[SyncPoint] = std::move ( CuNode) ;
121
+ CUgraphNode CuNode) {
122
+ SyncPoints[SyncPoint] = CuNode;
123
123
NextSyncPoint++;
124
124
}
125
125
@@ -130,8 +130,7 @@ struct ur_exp_command_buffer_handle_t_ {
130
130
// Helper to register next sync point
131
131
// @param CuNode Node to register as next sync point
132
132
// @return Pointer to the sync that registers the Node
133
- ur_exp_command_buffer_sync_point_t
134
- addSyncPoint (std::shared_ptr<CUgraphNode> CuNode) {
133
+ ur_exp_command_buffer_sync_point_t addSyncPoint (CUgraphNode CuNode) {
135
134
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
136
135
registerSyncPoint (SyncPoint, std::move (CuNode));
137
136
return SyncPoint;
@@ -173,8 +172,7 @@ struct ur_exp_command_buffer_handle_t_ {
173
172
std::atomic_uint32_t RefCountExternal;
174
173
175
174
// Map of sync_points to ur_events
176
- std::unordered_map<ur_exp_command_buffer_sync_point_t ,
177
- std::shared_ptr<CUgraphNode>>
175
+ std::unordered_map<ur_exp_command_buffer_sync_point_t , CUgraphNode>
178
176
SyncPoints;
179
177
// Next sync_point value (may need to consider ways to reuse values if 32-bits
180
178
// is not enough)
0 commit comments