11#include " cpy.cuh"
22#include " dequantize.cuh"
33#include " cpy-utils.cuh"
4- #ifdef GGML_USE_MUSA
4+ #if defined( GGML_USE_MUSA) && defined(GGML_MUSA_MUDNN_COPY)
55#include " ggml-musa/mudnn.cuh"
6- #endif // GGML_USE_MUSA
6+ #endif // GGML_USE_MUSA && GGML_MUSA_MUDNN_COPY
77
88typedef void (*cpy_kernel_t )(const char * cx, char * cdst);
99
@@ -121,7 +121,7 @@ static __global__ void cpy_q_f32(const char * cx, char * cdst_direct, const int
121121// Copy destination pointers to GPU to be available when pointer indirection is in use
122122
123123void ggml_cuda_cpy_dest_ptrs_copy (ggml_cuda_graph * cuda_graph, char ** host_dest_ptrs, const int host_dest_ptrs_size, cudaStream_t stream) {
124- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
124+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
125125 if (cuda_graph->dest_ptrs_size < host_dest_ptrs_size) { // (re-)allocate GPU memory for destination pointers
126126 CUDA_CHECK (cudaStreamSynchronize (stream));
127127 if (cuda_graph->dest_ptrs_d != nullptr ) {
@@ -314,7 +314,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
314314
315315 char ** dest_ptrs_d = nullptr ;
316316 int graph_cpynode_index = -1 ;
317- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
317+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
318318 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
319319 dest_ptrs_d = ctx.cuda_graph ->dest_ptrs_d ;
320320 graph_cpynode_index = ctx.cuda_graph ->graph_cpynode_index ;
@@ -324,11 +324,11 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
324324#endif
325325 if (src0->type == src1->type && ggml_is_contiguous (src0) && ggml_is_contiguous (src1)) {
326326 GGML_ASSERT (ggml_nbytes (src0) == ggml_nbytes (src1));
327- #ifdef GGML_USE_MUSA
327+ #if defined( GGML_USE_MUSA) && defined(GGML_MUSA_MUDNN_COPY)
328328 if (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16) {
329329 CUDA_CHECK (mudnnMemcpyAsync (ctx, src1, src0));
330330 } else
331- #endif // GGML_USE_MUSA
331+ #endif // GGML_USE_MUSA && GGML_MUSA_MUDNN_COPY
332332 {
333333 CUDA_CHECK (cudaMemcpyAsync (src1_ddc, src0_ddc, ggml_nbytes (src0), cudaMemcpyDeviceToDevice, main_stream));
334334 }
@@ -379,7 +379,7 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
379379 GGML_ABORT (" %s: unsupported type combination (%s to %s)\n " , __func__,
380380 ggml_type_name (src0->type ), ggml_type_name (src1->type ));
381381 }
382- #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS)
382+ #if defined(GGML_CUDA_USE_GRAPHS) || defined(GGML_HIP_GRAPHS) || defined(GGML_MUSA_GRAPHS)
383383 if (ctx.cuda_graph ->use_cpy_indirection && !disable_indirection_for_this_node) {
384384 ctx.cuda_graph ->graph_cpynode_index = graph_cpynode_index;
385385 }
0 commit comments