Skip to content

Commit 6d7df91

Browse files
committed
properly sync to stream
1 parent a3d1318 commit 6d7df91

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

ggml/src/ggml-cuda/cpy.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,15 +347,16 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
347347

348348
// Copy destination pointers to GPU to be available when pointer indirection is in use
349349

350-
void ggml_backend_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size) {
350+
void ggml_backend_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
351351
#if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
352352
if(cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
353353
if (cuda_graph->dest_ptrs_d != nullptr) cudaFree(cuda_graph->dest_ptrs_d);
354+
cudaStreamSynchronize(stream);
354355
cudaMalloc(&cuda_graph->dest_ptrs_d, host_dest_ptrs_size*sizeof(char *));
355356
cuda_graph->dest_ptrs_size = host_dest_ptrs_size;
356357
}
357358
// copy destination pointers to GPU
358-
cudaMemcpy(cuda_graph->dest_ptrs_d, host_dest_ptrs, host_dest_ptrs_size*sizeof(char *), cudaMemcpyHostToDevice);
359+
cudaMemcpyAsync(cuda_graph->dest_ptrs_d, host_dest_ptrs, host_dest_ptrs_size*sizeof(char *), cudaMemcpyHostToDevice, stream);
359360
cuda_graph->graph_cpynode_index = 0; // reset index
360361
#endif
361362
}

ggml/src/ggml-cuda/cpy.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
88

99
void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);
1010

11-
void ggml_backend_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size);
11+
void ggml_backend_dest_ptrs_copy(ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream);

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2486,7 +2486,7 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
24862486
if(use_cuda_graph)
24872487
{
24882488
// copy pointers to GPU so they can be accessed via indirection within CUDA graph
2489-
ggml_backend_dest_ptrs_copy(cuda_ctx->cuda_graph.get(), cuda_ctx->cuda_graph->cpy_dest_ptrs.data(), cuda_ctx->cuda_graph->cpy_dest_ptrs.size());
2489+
ggml_backend_dest_ptrs_copy(cuda_ctx->cuda_graph.get(), cuda_ctx->cuda_graph->cpy_dest_ptrs.data(), cuda_ctx->cuda_graph->cpy_dest_ptrs.size(), cuda_ctx->stream());
24902490
}
24912491

24922492
return use_cuda_graph;

0 commit comments

Comments
 (0)