@@ -11,7 +11,6 @@ BUILD_TARGETS = \
11
11
llama-embedding \
12
12
llama-eval-callback \
13
13
llama-export-lora \
14
- llama-finetune \
15
14
llama-gbnf-validator \
16
15
llama-gguf \
17
16
llama-gguf-hash \
@@ -37,7 +36,6 @@ BUILD_TARGETS = \
37
36
llama-simple \
38
37
llama-speculative \
39
38
llama-tokenize \
40
- llama-train-text-from-scratch \
41
39
llama-vdot \
42
40
llama-cvector-generator \
43
41
tests/test-c.o
@@ -64,13 +62,13 @@ TEST_TARGETS = \
64
62
tests/test-tokenizer-1-spm
65
63
66
64
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
67
- LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
65
+ LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \
68
66
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
69
- retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
67
+ retrieval speculative infill tokenize benchmark-matmult parallel export-lora lookahead lookup passkey gritlm
70
68
71
69
# Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
72
70
# We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
73
- LEGACY_TARGETS_BUILD = main quantize perplexity embedding server finetune
71
+ LEGACY_TARGETS_BUILD = main quantize perplexity embedding server
74
72
75
73
# Deprecation aliases
76
74
ifdef LLAMA_CUBLAS
@@ -327,9 +325,9 @@ ifdef LLAMA_DEBUG
327
325
endif
328
326
else
329
327
MK_CPPFLAGS += -DNDEBUG
330
- MK_CFLAGS += -O3
331
- MK_CXXFLAGS += -O3
332
- MK_NVCCFLAGS += -O3
328
+ MK_CFLAGS += -O3 -g
329
+ MK_CXXFLAGS += -O3 -g
330
+ MK_NVCCFLAGS += -O3 -g
333
331
endif
334
332
335
333
ifdef LLAMA_SANITIZE_THREAD
@@ -530,10 +528,21 @@ ifndef GGML_NO_ACCELERATE
530
528
endif
531
529
endif # GGML_NO_ACCELERATE
532
530
531
+ ifdef GGML_MUSA
532
+ CC := clang
533
+ CXX := clang++
534
+ GGML_CUDA := 1
535
+ MK_CPPFLAGS += -DGGML_USE_MUSA
536
+ endif
537
+
533
538
ifndef GGML_NO_OPENMP
534
539
MK_CPPFLAGS += -DGGML_USE_OPENMP
535
540
MK_CFLAGS += -fopenmp
536
541
MK_CXXFLAGS += -fopenmp
542
+ ifdef GGML_MUSA
543
+ MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
544
+ MK_LDFLAGS += -L/usr/lib/llvm-10/lib
545
+ endif # GGML_MUSA
537
546
endif # GGML_NO_OPENMP
538
547
539
548
ifdef GGML_OPENBLAS
@@ -584,15 +593,27 @@ else
584
593
endif # GGML_CUDA_FA_ALL_QUANTS
585
594
586
595
ifdef GGML_CUDA
587
- ifneq ('', '$(wildcard /opt/cuda)')
588
- CUDA_PATH ?= /opt/cuda
596
+ ifdef GGML_MUSA
597
+ ifneq ('', '$(wildcard /opt/musa)')
598
+ CUDA_PATH ?= /opt/musa
599
+ else
600
+ CUDA_PATH ?= /usr/local/musa
601
+ endif
602
+
603
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
604
+ MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
605
+ MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_22
589
606
else
590
- CUDA_PATH ?= /usr/local/cuda
591
- endif
607
+ ifneq ('', '$(wildcard /opt/cuda)')
608
+ CUDA_PATH ?= /opt/cuda
609
+ else
610
+ CUDA_PATH ?= /usr/local/cuda
611
+ endif
592
612
593
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
594
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
595
- MK_NVCCFLAGS += -use_fast_math
613
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
614
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
615
+ MK_NVCCFLAGS += -use_fast_math
616
+ endif # GGML_MUSA
596
617
597
618
OBJ_GGML += ggml/src/ggml-cuda.o
598
619
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
@@ -602,9 +623,11 @@ ifdef LLAMA_FATAL_WARNINGS
602
623
MK_NVCCFLAGS += -Werror all-warnings
603
624
endif # LLAMA_FATAL_WARNINGS
604
625
626
+ ifndef GGML_MUSA
605
627
ifndef JETSON_EOL_MODULE_DETECT
606
628
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
607
629
endif # JETSON_EOL_MODULE_DETECT
630
+ endif # GGML_MUSA
608
631
609
632
ifdef LLAMA_DEBUG
610
633
MK_NVCCFLAGS += -lineinfo
@@ -617,8 +640,12 @@ endif # GGML_CUDA_DEBUG
617
640
ifdef GGML_CUDA_NVCC
618
641
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
619
642
else
620
- NVCC = $(CCACHE) nvcc
621
- endif # GGML_CUDA_NVCC
643
+ ifdef GGML_MUSA
644
+ NVCC = $(CCACHE) mcc
645
+ else
646
+ NVCC = $(CCACHE) nvcc
647
+ endif # GGML_MUSA
648
+ endif # GGML_CUDA_NVCC
622
649
623
650
ifdef CUDA_DOCKER_ARCH
624
651
MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
@@ -689,9 +716,15 @@ define NVCC_COMPILE
689
716
$(NVCC ) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
690
717
endef # NVCC_COMPILE
691
718
else
719
+ ifdef GGML_MUSA
720
+ define NVCC_COMPILE
721
+ $(NVCC ) $(NVCCFLAGS ) $(CPPFLAGS ) -c $< -o $@
722
+ endef # NVCC_COMPILE
723
+ else
692
724
define NVCC_COMPILE
693
725
$(NVCC ) $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
694
726
endef # NVCC_COMPILE
727
+ endif # GGML_MUSA
695
728
endif # JETSON_EOL_MODULE_DETECT
696
729
697
730
ggml/src/ggml-cuda/% .o : \
@@ -946,6 +979,7 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
946
979
ifdef GGML_CUDA
947
980
$(info I NVCC : $(shell $(NVCC ) --version | tail -n 1) )
948
981
CUDA_VERSION := $(shell $(NVCC ) --version | grep -oP 'release (\K[0-9]+\.[0-9]) ')
982
+ ifndef GGML_MUSA
949
983
ifeq ($(shell awk -v "v=$(CUDA_VERSION ) " 'BEGIN { print (v < 11.7) }'),1)
950
984
951
985
ifndef CUDA_DOCKER_ARCH
@@ -955,6 +989,7 @@ endif # CUDA_POWER_ARCH
955
989
endif # CUDA_DOCKER_ARCH
956
990
957
991
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
992
+ endif # GGML_MUSA
958
993
endif # GGML_CUDA
959
994
$(info )
960
995
@@ -1296,11 +1331,6 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
1296
1331
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1297
1332
$(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1298
1333
1299
- llama-train-text-from-scratch : examples/train-text-from-scratch/train-text-from-scratch.cpp \
1300
- $(OBJ_ALL )
1301
- $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1302
- $(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1303
-
1304
1334
llama-convert-llama2c-to-ggml : examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
1305
1335
$(OBJ_GGML ) $(OBJ_LLAMA )
1306
1336
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
@@ -1316,13 +1346,8 @@ llama-baby-llama: examples/baby-llama/baby-llama.cpp \
1316
1346
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1317
1347
$(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1318
1348
1319
- llama-finetune : examples/finetune/finetune.cpp \
1320
- $(OBJ_ALL )
1321
- $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1322
- $(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1323
-
1324
1349
llama-export-lora : examples/export-lora/export-lora.cpp \
1325
- $(OBJ_GGML ) common/log.h
1350
+ $(OBJ_ALL )
1326
1351
$(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1327
1352
$(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1328
1353
@@ -1578,7 +1603,7 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
1578
1603
# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1579
1604
#
1580
1605
# Mark legacy binary targets as .PHONY so that they are always checked.
1581
- .PHONY : main quantize perplexity embedding server finetune
1606
+ .PHONY : main quantize perplexity embedding server
1582
1607
1583
1608
# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1584
1609
# Eventually we will want to remove these target from building all the time.
@@ -1621,13 +1646,3 @@ ifneq (,$(wildcard embedding))
1621
1646
@echo " Remove the 'embedding' binary to remove this warning."
1622
1647
@echo "# ########"
1623
1648
endif
1624
-
1625
- finetune : examples/deprecation-warning/deprecation-warning.cpp
1626
- ifneq (,$(wildcard finetune) )
1627
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1628
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1629
- @echo "# ########"
1630
- @echo "WARNING: The 'finetune' binary is deprecated. Please use 'llama-finetune' instead."
1631
- @echo " Remove the 'finetune' binary to remove this warning."
1632
- @echo "# ########"
1633
- endif
0 commit comments