@@ -81,6 +81,7 @@ FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
8181
8282# these are used on windows, to build some libraries with extra old device compatibility
8383SIMPLECFLAGS =
84+ SIMPLERCFLAGS =
8485FULLCFLAGS =
8586NONECFLAGS =
8687
@@ -97,6 +98,7 @@ CUBLAS_OBJS =
9798
9899OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
99100OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
101+ OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o
100102OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
101103
102104# OS specific
@@ -153,11 +155,12 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
153155 ifdef LLAMA_PORTABLE
154156 CFLAGS +=
155157 NONECFLAGS +=
156- SIMPLECFLAGS += -mavx -msse3
158+ SIMPLECFLAGS += -mavx -msse3 -mssse3
159+ SIMPLERCFLAGS += -msse3 -mssse3
157160 ifdef LLAMA_NOAVX2
158- FULLCFLAGS += -msse3 -mavx
161+ FULLCFLAGS += -msse3 -mssse3 - mavx
159162 else
160- FULLCFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx
163+ FULLCFLAGS += -mavx2 -msse3 -mssse3 - mfma -mf16c -mavx
161164 endif
162165 else
163166 CFLAGS += -march=native -mtune=native
@@ -166,11 +169,12 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
166169 ifdef LLAMA_PORTABLE
167170 CFLAGS +=
168171 NONECFLAGS +=
169- SIMPLECFLAGS += -mavx -msse3
172+ SIMPLECFLAGS += -mavx -msse3 -mssse3
173+ SIMPLERCFLAGS += -msse3 -mssse3
170174 ifdef LLAMA_NOAVX2
171- FULLCFLAGS += -msse3 -mavx
175+ FULLCFLAGS += -msse3 -mssse3 - mavx
172176 else
173- FULLCFLAGS += -mavx2 -msse3 -mfma -mf16c -mavx
177+ FULLCFLAGS += -mavx2 -msse3 -mssse3 - mfma -mf16c -mavx
174178 endif
175179 else
176180 CFLAGS += -march=native -mtune=native
@@ -512,7 +516,7 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
512516ggml_v4_cublas.o : ggml/src/ggml.c ggml/include/ggml.h
513517 $(CC ) $(FASTCFLAGS ) $(FULLCFLAGS ) $(CUBLAS_FLAGS ) $(HIPFLAGS ) -c $< -o $@
514518ggml_v4_clblast_noavx2.o : ggml/src/ggml.c ggml/include/ggml.h
515- $(CC ) $(FASTCFLAGS ) $(SIMPLECFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
519+ $(CC ) $(FASTCFLAGS ) $(SIMPLERCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
516520ggml_v4_vulkan.o : ggml/src/ggml.c ggml/include/ggml.h
517521 $(CC ) $(FASTCFLAGS ) $(FULLCFLAGS ) $(VULKAN_FLAGS ) -c $< -o $@
518522ggml_v4_vulkan_noavx2.o : ggml/src/ggml.c ggml/include/ggml.h
@@ -528,19 +532,23 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml/
528532ggml-cpu_v4_clblast.o : ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
529533 $(CC ) $(FASTCFLAGS ) $(FULLCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
530534ggml-cpu_v4_clblast_noavx2.o : ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
531- $(CC ) $(FASTCFLAGS ) $(SIMPLECFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
535+ $(CC ) $(FASTCFLAGS ) $(SIMPLERCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
532536
533537# quants
534538ggml-quants.o : ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
535539 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) -c $< -o $@
536540ggml-quants_noavx2.o : ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
537541 $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
542+ ggml-quants_noavx1.o : ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
543+ $(CC ) $(CFLAGS ) $(SIMPLERCFLAGS ) -c $< -o $@
538544ggml-quants_failsafe.o : ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
539545 $(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
540546ggml-cpu-quants.o : ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
541547 $(CC ) $(CFLAGS ) $(FULLCFLAGS ) -c $< -o $@
542548ggml-cpu-quants_noavx2.o : ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
543549 $(CC ) $(CFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
550+ ggml-cpu-quants_noavx1.o : ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
551+ $(CC ) $(CFLAGS ) $(SIMPLERCFLAGS ) -c $< -o $@
544552ggml-cpu-quants_failsafe.o : ggml/src/ggml-cpu/ggml-cpu-quants.c ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-quants.h ggml/src/ggml-common.h ggml/src/iqk_croco/iqk_quantize_croco.cpp ggml/src/iqk_croco/iqk_quantize_croco.h
545553 $(CC ) $(CFLAGS ) $(NONECFLAGS ) -c $< -o $@
546554
@@ -549,6 +557,8 @@ ggml-cpu-aarch64.o: ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h g
549557 $(CXX ) $(CXXFLAGS ) $(FULLCFLAGS ) -c $< -o $@
550558ggml-cpu-aarch64_noavx2.o : ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
551559 $(CXX ) $(CXXFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
560+ ggml-cpu-aarch64_noavx1.o : ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
561+ $(CXX ) $(CXXFLAGS ) $(SIMPLERCFLAGS ) -c $< -o $@
552562ggml-cpu-aarch64_failsafe.o : ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp ggml/include/ggml.h ggml/src/ggml-cpu/ggml-cpu-aarch64.h
553563 $(CXX ) $(CXXFLAGS ) $(NONECFLAGS ) -c $< -o $@
554564
@@ -557,6 +567,8 @@ sgemm.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm
557567 $(CXX ) $(CXXFLAGS ) $(FULLCFLAGS ) -c $< -o $@
558568sgemm_noavx2.o : ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
559569 $(CXX ) $(CXXFLAGS ) $(SIMPLECFLAGS ) -c $< -o $@
570+ sgemm_noavx1.o : ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
571+ $(CXX ) $(CXXFLAGS ) $(SIMPLERCFLAGS ) -c $< -o $@
560572sgemm_failsafe.o : ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamafile/sgemm.h ggml/include/ggml.h
561573 $(CXX ) $(CXXFLAGS ) $(NONECFLAGS ) -c $< -o $@
562574
@@ -612,7 +624,7 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
612624ggml_v3_cublas.o : otherarch/ggml_v3.c otherarch/ggml_v3.h
613625 $(CC ) $(FASTCFLAGS ) $(FULLCFLAGS ) $(CUBLAS_FLAGS ) $(HIPFLAGS ) -c $< -o $@
614626ggml_v3_clblast_noavx2.o : otherarch/ggml_v3.c otherarch/ggml_v3.h
615- $(CC ) $(FASTCFLAGS ) $(SIMPLECFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
627+ $(CC ) $(FASTCFLAGS ) $(SIMPLERCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
616628
617629# version 2 libs
618630ggml_v2.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
@@ -626,7 +638,7 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
626638ggml_v2_cublas.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
627639 $(CC ) $(FASTCFLAGS ) $(FULLCFLAGS ) $(CUBLAS_FLAGS ) $(HIPFLAGS ) -c $< -o $@
628640ggml_v2_clblast_noavx2.o : otherarch/ggml_v2.c otherarch/ggml_v2.h
629- $(CC ) $(FASTCFLAGS ) $(SIMPLECFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
641+ $(CC ) $(FASTCFLAGS ) $(SIMPLERCFLAGS ) $(CLBLAST_FLAGS ) -c $< -o $@
630642
631643# extreme old version compat
632644ggml_v1.o : otherarch/ggml_v1.c otherarch/ggml_v1.h
@@ -747,7 +759,7 @@ ifdef CLBLAST_BUILD
747759koboldcpp_clblast : ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL ) $(OBJS )
748760 $(CLBLAST_BUILD )
749761ifdef NOAVX2_BUILD
750- koboldcpp_clblast_noavx2 : ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE ) $(OBJS )
762+ koboldcpp_clblast_noavx2 : ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER ) $(OBJS )
751763 $(CLBLAST_BUILD )
752764else
753765koboldcpp_clblast_noavx2 :
0 commit comments