File tree Expand file tree Collapse file tree 3 files changed +12
-7
lines changed Expand file tree Collapse file tree 3 files changed +12
-7
lines changed Original file line number Diff line number Diff line change @@ -88,6 +88,7 @@ endif()
8888# 3rd party libs
8989option (LLAMA_ACCELERATE "llama: enable Accelerate framework" ON )
9090option (LLAMA_BLAS "llama: use BLAS" OFF )
91+ option (LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON )
9192set (LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor" )
9293option (LLAMA_CUDA "llama: use CUDA" OFF )
9394option (LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF )
@@ -286,6 +287,7 @@ if (LLAMA_METAL)
286287 ${METALKIT_FRAMEWORK}
287288 )
288289endif ()
290+
289291if (LLAMA_BLAS)
290292 if (LLAMA_STATIC)
291293 set (BLA_STATIC ON )
@@ -368,6 +370,10 @@ if (LLAMA_BLAS)
368370 endif ()
369371endif ()
370372
373+ if (LLAMA_LLAMAFILE)
374+ add_compile_definitions (GGML_USE_LLAMAFILE)
375+ endif ()
376+
371377if (LLAMA_QKK_64)
372378 add_compile_definitions (GGML_QKK_64)
373379endif ()
Original file line number Diff line number Diff line change @@ -222,6 +222,8 @@ endif # LLAMA_DISABLE_LOGS
222222# disable ggml.c's use of sgemm.cpp
223223ifdef LLAMA_NO_LLAMAFILE
224224 MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225+ else
226+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
225227endif
226228
227229# warnings
Original file line number Diff line number Diff line change 3333#include <unistd.h>
3434#endif
3535
36- #ifndef GGML_USE_LLAMAFILE
3736#ifdef __ARM_FEATURE_MATMUL_INT8
38- #define GGML_USE_LLAMAFILE 0
39- #else
40- #define GGML_USE_LLAMAFILE 1
41- #endif
37+ #undef GGML_USE_LLAMAFILE
4238#endif
4339
4440#if defined(_MSC_VER)
@@ -10879,8 +10875,9 @@ UseGgmlGemm1:;
1087910875 if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
1088010876 (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
1088110877 nb01/ggml_type_size(src0->type),
10882- (const char *)wdata + (nb12/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i12 +
10883- nb13/ggml_type_size(src1->type)*ggml_type_size(vec_dot_type)*i13),
10878+ (const char *)wdata + ggml_row_size(vec_dot_type,
10879+ nb12/ggml_type_size(src1->type)*i12 +
10880+ nb13/ggml_type_size(src1->type)*i13),
1088410881 row_size/ggml_type_size(vec_dot_type),
1088510882 (char *)dst->data + i12*nb2 + i13*nb3,
1088610883 nb1/ggml_type_size(dst->type),
You can’t perform that action at this time.
0 commit comments