@@ -26,20 +26,6 @@ ifndef UNAME_M
2626UNAME_M := $(shell uname -m)
2727endif
2828
29- ifeq '' '$(findstring clang,$(shell $(CC ) --version) ) '
30- CC_IS_GCC=1
31- CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
32- else
33- CC_IS_CLANG=1
34- ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
35- CC_IS_LLVM_CLANG=1
36- else
37- CC_IS_APPLE_CLANG=1
38- endif
39- CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
40- | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
41- endif
42-
4329# Mac OS + Arm can report x86_64
4430# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
4531ifeq ($(UNAME_S ) ,Darwin)
@@ -121,12 +107,12 @@ MK_CXXFLAGS = -std=c++11 -fPIC
121107
122108# -Ofast tends to produce faster code, but may not be available for some compilers.
123109ifdef LLAMA_FAST
124- MK_CFLAGS += -Ofast
125- MK_HOST_CXXFLAGS += -Ofast
126- MK_CUDA_CXXFLAGS += -O3
110+ MK_CFLAGS += -Ofast
111+ HOST_CXXFLAGS += -Ofast
112+ MK_NVCCFLAGS += -O3
127113else
128- MK_CFLAGS += -O3
129- MK_CXXFLAGS += -O3
114+ MK_CFLAGS += -O3
115+ MK_CXXFLAGS += -O3
130116endif
131117
132118# clock_gettime came in POSIX.1b (1993)
@@ -220,30 +206,6 @@ MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmis
220206 -Werror=implicit-function-declaration
221207MK_CXXFLAGS += $(WARN_FLAGS ) -Wmissing-declarations -Wmissing-noreturn
222208
223- ifeq ($(CC_IS_CLANG ) , 1)
224- # clang options
225- MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
226- MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
227-
228- ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
229- MK_CFLAGS += -Wdouble-promotion
230- endif
231- ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
232- MK_CFLAGS += -Wdouble-promotion
233- endif
234- else
235- # gcc options
236- MK_CFLAGS += -Wdouble-promotion
237- MK_HOST_CXXFLAGS += -Wno-array-bounds
238-
239- ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
240- MK_HOST_CXXFLAGS += -Wno-format-truncation
241- endif
242- ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
243- MK_HOST_CXXFLAGS += -Wextra-semi
244- endif
245- endif
246-
247209# this version of Apple ld64 is buggy
248210ifneq '' '$(findstring dyld-1015.7,$(shell $(CC ) $(LDFLAGS ) -Wl,-v 2>&1) ) '
249211 MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
@@ -294,8 +256,8 @@ ifndef RISCV
294256
295257ifeq ($(UNAME_M ) ,$(filter $(UNAME_M ) ,x86_64 i686 amd64) )
296258 # Use all CPU extensions that are available:
297- MK_CFLAGS += -march=native -mtune=native
298- MK_HOST_CXXFLAGS += -march=native -mtune=native
259+ MK_CFLAGS += -march=native -mtune=native
260+ HOST_CXXFLAGS += -march=native -mtune=native
299261
300262 # Usage AVX-only
301263 # MK_CFLAGS += -mfma -mf16c -mavx
@@ -398,10 +360,10 @@ ifdef LLAMA_CUBLAS
398360 MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
399361 MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
400362 OBJS += ggml-cuda.o
401- NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
363+ MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
402364
403365ifdef LLAMA_DEBUG
404- NVCCFLAGS += -lineinfo
366+ MK_NVCCFLAGS += -lineinfo
405367endif
406368
407369ifdef LLAMA_CUDA_NVCC
@@ -410,54 +372,52 @@ else
410372 NVCC = nvcc
411373endif # LLAMA_CUDA_NVCC
412374ifdef CUDA_DOCKER_ARCH
413- NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
414- else ifdef CUDA_POWER_ARCH
415- NVCCFLAGS +=
416- else
417- NVCCFLAGS += -arch=native
375+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
376+ else ifndef CUDA_POWER_ARCH
377+ MK_NVCCFLAGS += -arch=native
418378endif # CUDA_DOCKER_ARCH
419379ifdef LLAMA_CUDA_FORCE_DMMV
420- NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
380+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
421381endif # LLAMA_CUDA_FORCE_DMMV
422382ifdef LLAMA_CUDA_FORCE_MMQ
423- NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
383+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
424384endif # LLAMA_CUDA_FORCE_MMQ
425385ifdef LLAMA_CUDA_DMMV_X
426- NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
386+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
427387else
428- NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
388+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
429389endif # LLAMA_CUDA_DMMV_X
430390ifdef LLAMA_CUDA_MMV_Y
431- NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
391+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
432392else ifdef LLAMA_CUDA_DMMV_Y
433- NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
393+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
434394else
435- NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
395+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
436396endif # LLAMA_CUDA_MMV_Y
437397ifdef LLAMA_CUDA_F16
438- NVCCFLAGS += -DGGML_CUDA_F16
398+ MK_NVCCFLAGS += -DGGML_CUDA_F16
439399endif # LLAMA_CUDA_F16
440400ifdef LLAMA_CUDA_DMMV_F16
441- NVCCFLAGS += -DGGML_CUDA_F16
401+ MK_NVCCFLAGS += -DGGML_CUDA_F16
442402endif # LLAMA_CUDA_DMMV_F16
443403ifdef LLAMA_CUDA_KQUANTS_ITER
444- NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
404+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
445405else
446- NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
406+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
447407endif
448408ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE
449- NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
409+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE)
450410else
451- NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
411+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
452412endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
453413# ifdef LLAMA_CUDA_CUBLAS
454- # NVCCFLAGS += -DGGML_CUDA_CUBLAS
414+ # MK_NVCCFLAGS += -DGGML_CUDA_CUBLAS
455415# endif # LLAMA_CUDA_CUBLAS
456416ifdef LLAMA_CUDA_CCBIN
457- NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
417+ MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
458418endif
459419ggml-cuda.o : ggml-cuda.cu ggml-cuda.h
460- $(NVCC ) $(NVCCFLAGS ) -c $< -o $@
420+ $(NVCC ) $(BASE_CXXFLAGS ) $( NVCCFLAGS ) -Wno-pedantic -Xcompiler " $( CUDA_CXXFLAGS ) " -c $< -o $@
461421endif # LLAMA_CUBLAS
462422
463423ifdef LLAMA_CLBLAST
@@ -519,16 +479,22 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
519479 $(CC ) $(CFLAGS ) -c $< -o $@
520480endif # LLAMA_MPI
521481
482+ GF_CC := $(CC )
483+ include scripts/get-flags.mk
484+
522485# combine build flags with cmdline overrides
523- override CFLAGS := $(MK_CPPFLAGS ) $(CPPFLAGS ) $(MK_CFLAGS ) $(CFLAGS )
524- override CXXFLAGS := $(MK_CPPFLAGS ) $(CPPFLAGS ) $(MK_CXXFLAGS ) $(CXXFLAGS )
525- override CUDA_CXXFLAGS := $(MK_CUDA_CXXFLAGS ) $(CUDA_CXXFLAGS )
526- override HOST_CXXFLAGS := $(MK_HOST_CXXFLAGS ) $(HOST_CXXFLAGS )
527- override LDFLAGS := $(MK_LDFLAGS ) $(LDFLAGS )
528-
529- # save CXXFLAGS before we add host-only options
530- NVCCFLAGS := $(NVCCFLAGS ) $(CXXFLAGS ) $(CUDA_CXXFLAGS ) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS ) "
531- override CXXFLAGS += $(HOST_CXXFLAGS )
486+ override CFLAGS := $(MK_CPPFLAGS ) $(CPPFLAGS ) $(MK_CFLAGS ) $(GF_CFLAGS ) $(CFLAGS )
487+ BASE_CXXFLAGS := $(MK_CPPFLAGS ) $(CPPFLAGS ) $(MK_CXXFLAGS ) $(CXXFLAGS )
488+ override CXXFLAGS := $(BASE_CXXFLAGS ) $(HOST_CXXFLAGS ) $(GF_CXXFLAGS )
489+ override NVCCFLAGS := $(MK_NVCCFLAGS ) $(NVCCFLAGS )
490+ override LDFLAGS := $(MK_LDFLAGS ) $(LDFLAGS )
491+
492+ # identify CUDA host compiler
493+ ifdef LLAMA_CUBLAS
494+ GF_CC := $(NVCC ) $(NVCCFLAGS ) 2>/dev/null .c -Xcompiler
495+ include scripts/get-flags.mk
496+ CUDA_CXXFLAGS := $(GF_CXXFLAGS )
497+ endif
532498
533499#
534500# Print build information
0 commit comments