@@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE
531531	endif
532532endif  #  GGML_NO_ACCELERATE
533533
534- ifdef  GGML_MUSA 
535- 	CC := clang
536- 	CXX := clang++
537- 	GGML_CUDA := 1
538- 	MK_CPPFLAGS += -DGGML_USE_MUSA
539- endif 
540- 
541534ifndef  GGML_NO_OPENMP 
542535	MK_CPPFLAGS += -DGGML_USE_OPENMP
543536	MK_CFLAGS   += -fopenmp
544537	MK_CXXFLAGS += -fopenmp
545- 	ifdef GGML_MUSA
546- 		MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
547- 		MK_LDFLAGS  += -L/usr/lib/llvm-10/lib
548- 	endif #  GGML_MUSA
549538endif  #  GGML_NO_OPENMP
550539
551540ifdef  GGML_OPENBLAS 
@@ -601,27 +590,15 @@ else
601590endif  #  GGML_CUDA_FA_ALL_QUANTS
602591
603592ifdef  GGML_CUDA 
604- 	ifdef GGML_MUSA
605- 		ifneq ('', '$(wildcard /opt/musa)')
606- 			CUDA_PATH ?= /opt/musa
607- 		else
608- 			CUDA_PATH ?= /usr/local/musa
609- 		endif
610- 
611- 		MK_CPPFLAGS  += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
612- 		MK_LDFLAGS   += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
613- 		MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
593+ 	ifneq ('', '$(wildcard /opt/cuda)')
594+ 		CUDA_PATH ?= /opt/cuda
614595	else
615- 		ifneq ('', '$(wildcard /opt/cuda)')
616- 			CUDA_PATH ?= /opt/cuda
617- 		else
618- 			CUDA_PATH ?= /usr/local/cuda
619- 		endif
596+ 		CUDA_PATH ?= /usr/local/cuda
597+ 	endif
620598
621- 		MK_CPPFLAGS  += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
622- 		MK_LDFLAGS   += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
623- 		MK_NVCCFLAGS += -use_fast_math
624- 	endif #  GGML_MUSA
599+ 	MK_CPPFLAGS  += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
600+ 	MK_LDFLAGS   += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
601+ 	MK_NVCCFLAGS += -use_fast_math
625602
626603	OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
627604	OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
@@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS
631608	MK_NVCCFLAGS += -Werror all-warnings
632609endif  #  LLAMA_FATAL_WARNINGS
633610
634- ifndef  GGML_MUSA 
635611ifndef  JETSON_EOL_MODULE_DETECT 
636612	MK_NVCCFLAGS += --forward-unknown-to-host-compiler
637613endif  #  JETSON_EOL_MODULE_DETECT
638- endif  #  GGML_MUSA
639614
640615ifdef  LLAMA_DEBUG 
641616	MK_NVCCFLAGS += -lineinfo
@@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG
648623ifdef  GGML_CUDA_NVCC 
649624	NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
650625else
651- 	ifdef GGML_MUSA
652- 		NVCC = $(CCACHE) mcc
653- 	else
654- 		NVCC = $(CCACHE) nvcc
655- 	endif #  GGML_MUSA
626+ 	NVCC = $(CCACHE) nvcc
656627endif  #  GGML_CUDA_NVCC
657628
658629ifdef  CUDA_DOCKER_ARCH 
@@ -724,15 +695,9 @@ define NVCC_COMPILE
724695	$(NVCC )  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS )  $(CPPFLAGS )  -Xcompiler "$(CUDA_CXXFLAGS ) " -c $<  -o $@ 
725696endef  #  NVCC_COMPILE
726697else
727- 	ifdef GGML_MUSA
728- define  NVCC_COMPILE 
729- 	$(NVCC )  $(NVCCFLAGS )  $(CPPFLAGS )  -c $<  -o $@ 
730- endef  #  NVCC_COMPILE
731- 	else
732698define  NVCC_COMPILE 
733699	$(NVCC )  $(NVCCFLAGS )  $(CPPFLAGS )  -Xcompiler "$(CUDA_CXXFLAGS ) " -c $<  -o $@ 
734700endef  #  NVCC_COMPILE
735- 	endif #  GGML_MUSA
736701endif  #  JETSON_EOL_MODULE_DETECT
737702
738703ggml/src/ggml-cuda/% .o : \ 
@@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \
874839	$(HIPCC )  $(CXXFLAGS )  $(HIPFLAGS )  -x hip -c -o $@  $< 
875840endif  #  GGML_HIPBLAS
876841
842+ ifdef  GGML_MUSA 
843+ 	ifeq ($(wildcard /opt/musa),)
844+ 		MUSA_PATH ?= /usr/local/musa
845+ 	else
846+ 		MUSA_PATH ?= /opt/musa
847+ 	endif
848+ 	MTGPU_TARGETS ?= mp_21 mp_22
849+ 
850+ 	MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
851+ 	MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
852+ 	MK_LDFLAGS += -lmusa -lmusart -lmublas
853+ 
854+ 	ifndef GGML_NO_OPENMP
855+ 		#  For Ubuntu Focal
856+ 		MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
857+ 		MK_LDFLAGS  += -L/usr/lib/llvm-10/lib
858+ 		#  For Ubuntu Jammy
859+ 		MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
860+ 		MK_LDFLAGS  += -L/usr/lib/llvm-14/lib
861+ 	endif #  GGML_NO_OPENMP
862+ 
863+ 	CC  := $(MUSA_PATH)/bin/clang
864+ 	CXX := $(MUSA_PATH)/bin/clang++
865+ 	MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
866+ 
867+ 	MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS))
868+ 
869+ ifdef  GGML_CUDA_FORCE_DMMV 
870+ 	MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV
871+ endif  #  GGML_CUDA_FORCE_DMMV
872+ 
873+ ifdef  GGML_CUDA_FORCE_MMQ 
874+ 	MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
875+ endif  #  GGML_CUDA_FORCE_MMQ
876+ 
877+ ifdef  GGML_CUDA_FORCE_CUBLAS 
878+ 	MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
879+ endif  #  GGML_CUDA_FORCE_CUBLAS
880+ 
881+ ifdef  GGML_CUDA_DMMV_X 
882+ 	MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
883+ else
884+ 	MUSAFLAGS += -DGGML_CUDA_DMMV_X=32
885+ endif  #  GGML_CUDA_DMMV_X
886+ 
887+ ifdef  GGML_CUDA_MMV_Y 
888+ 	MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
889+ else
890+ 	MUSAFLAGS += -DGGML_CUDA_MMV_Y=1
891+ endif  #  GGML_CUDA_MMV_Y
892+ 
893+ ifdef  GGML_CUDA_F16 
894+ 	MUSAFLAGS += -DGGML_CUDA_F16
895+ endif  #  GGML_CUDA_F16
896+ 
897+ ifdef  GGML_CUDA_DMMV_F16 
898+ 	MUSAFLAGS += -DGGML_CUDA_F16
899+ endif  #  GGML_CUDA_DMMV_F16
900+ 
901+ ifdef  GGML_CUDA_KQUANTS_ITER 
902+ 	MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
903+ else
904+ 	MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2
905+ endif 
906+ 
907+ ifdef  GGML_CUDA_PEER_MAX_BATCH_SIZE 
908+ 	MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
909+ else
910+ 	MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
911+ endif  #  GGML_CUDA_PEER_MAX_BATCH_SIZE
912+ 
913+ ifdef  GGML_CUDA_NO_PEER_COPY 
914+ 	MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
915+ endif  #  GGML_CUDA_NO_PEER_COPY
916+ 
917+ ifdef  GGML_CUDA_FA_ALL_QUANTS 
918+ 	MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
919+ endif  #  GGML_CUDA_FA_ALL_QUANTS
920+ 
921+ 	OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
922+ 	OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
923+ 	OBJ_GGML += $(OBJ_CUDA_TMPL)
924+ 
925+ ggml/src/ggml-cuda/ggml-cuda.o : \ 
926+ 	ggml/src/ggml-cuda/ggml-cuda.cu \ 
927+ 	ggml/include/ggml-cuda.h \ 
928+ 	ggml/include/ggml.h \ 
929+ 	ggml/include/ggml-backend.h \ 
930+ 	ggml/src/ggml-backend-impl.h \ 
931+ 	ggml/src/ggml-common.h \ 
932+ 	$(wildcard  ggml/src/ggml-cuda/* .cuh) 
933+ 	$(MCC )  $(CXXFLAGS )  $(MUSAFLAGS )  -x musa -mtgpu -c -o $@  $< 
934+ 
935+ ggml/src/ggml-cuda/% .o : \ 
936+ 	ggml/src/ggml-cuda/%.cu \ 
937+ 	ggml/include/ggml.h \ 
938+ 	ggml/src/ggml-common.h \ 
939+ 	ggml/src/ggml-cuda/common.cuh
940+ 	$(MCC )  $(CXXFLAGS )  $(MUSAFLAGS )  -x musa -mtgpu -c -o $@  $< 
941+ endif  #  GGML_MUSA
942+ 
877943ifdef  GGML_METAL 
878944	MK_CPPFLAGS += -DGGML_USE_METAL
879945	MK_LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit
@@ -1002,7 +1068,6 @@ $(info I CXX:       $(shell $(CXX)  --version | head -n 1))
10021068ifdef  GGML_CUDA 
10031069$(info  I NVCC  :      $(shell  $(NVCC )  --version | tail -n 1) )
10041070CUDA_VERSION  := $(shell  $(NVCC )  --version | grep -oP 'release (\K[0-9]+\.[0-9]) ')
1005- ifndef  GGML_MUSA 
10061071ifeq  ($(shell  awk -v "v=$(CUDA_VERSION ) " 'BEGIN { print (v < 11.7)  }'),1)
10071072
10081073ifndef  CUDA_DOCKER_ARCH 
@@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH
10121077endif  #  CUDA_DOCKER_ARCH
10131078
10141079endif  #  eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1015- endif  #  GGML_MUSA
10161080endif  #  GGML_CUDA
10171081$(info )
10181082
@@ -1283,6 +1347,7 @@ clean:
12831347	rm -vrf ggml/src/ggml-rpc/* .o
12841348	rm -vrf ggml/src/ggml-sycl/* .o
12851349	rm -vrf ggml/src/ggml-vulkan/* .o
1350+ 	rm -vrf ggml/src/ggml-musa/* .o
12861351	rm -rvf $(BUILD_TARGETS ) 
12871352	rm -rvf $(TEST_TARGETS ) 
12881353	rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp
0 commit comments