@@ -358,7 +358,7 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
358358// Copy destination pointers to GPU to be available when pointer indirection is in use
359359
360360void ggml_cuda_cpy_dest_ptrs_copy (ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
361- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
361+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
362362 if (cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
363363 CUDA_CHECK (cudaStreamSynchronize (stream));
364364 if (cuda_graph->dest_ptrs_d != nullptr ) {
@@ -590,7 +590,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
590590
591591 char ** dest_ptrs_d = nullptr ;
592592 int graph_cpynode_index = -1 ;
593- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
593+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
594594 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
595595 dest_ptrs_d = ctx.cuda_graph ->dest_ptrs_d ;
596596 graph_cpynode_index = ctx.cuda_graph ->graph_cpynode_index ;
@@ -647,7 +647,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
647647 GGML_ABORT (" %s: unsupported type combination (%s to %s)\n " , __func__,
648648 ggml_type_name (src0->type ), ggml_type_name (src1->type ));
649649 }
650- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
650+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
651651 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
652652 ctx.cuda_graph ->graph_cpynode_index = graph_cpynode_index;
653653 }
0 commit comments