@@ -121,7 +121,7 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
121121// Copy destination pointers to GPU to be available when pointer indirection is in use
122122
123123void ggml_cuda_cpy_dest_ptrs_copy (ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
124- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
124+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
125125 if (cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
126126 CUDA_CHECK (cudaStreamSynchronize (stream));
127127 if (cuda_graph->dest_ptrs_d != nullptr ) {
@@ -314,7 +314,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
314314
315315 char ** dest_ptrs_d = nullptr ;
316316 int graph_cpynode_index = -1 ;
317- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
317+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
318318 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
319319 dest_ptrs_d = ctx.cuda_graph ->dest_ptrs_d ;
320320 graph_cpynode_index = ctx.cuda_graph ->graph_cpynode_index ;
@@ -379,7 +379,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
379379 GGML_ABORT (" %s: unsupported type combination (%s to %s)\n " , __func__,
380380 ggml_type_name (src0->type ), ggml_type_name (src1->type ));
381381 }
382- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
382+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
383383 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
384384 ctx.cuda_graph ->graph_cpynode_index = graph_cpynode_index;
385385 }
0 commit comments