Skip to content

Commit 932f28e

Browse files
authored
Merge branch 'ggerganov:master' into patch-4
2 parents 6e9d976 + 74d73dc commit 932f28e

File tree

4 files changed

+31
-14
lines changed

4 files changed

+31
-14
lines changed

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ ifdef LLAMA_SERVER_SSL
359359
MK_LDFLAGS += -lssl -lcrypto
360360
endif
361361

362+
ifndef GGML_NO_CPU_AARCH64
363+
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
364+
endif
365+
362366
# warnings
363367
WARN_FLAGS = \
364368
-Wall \
@@ -940,10 +944,6 @@ ggml/src/ggml-cuda/%.o: \
940944
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
941945
endif # GGML_MUSA
942946

943-
ifndef GGML_NO_CPU_AARCH64
944-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
945-
endif
946-
947947
ifdef GGML_METAL
948948
MK_CPPFLAGS += -DGGML_USE_METAL
949949
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,23 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
143143
if (GGML_AVX512_VBMI)
144144
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
145145
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
146+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
147+
list(APPEND ARCH_FLAGS -mavx512vbmi)
148+
endif()
146149
endif()
147150
if (GGML_AVX512_VNNI)
148151
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
149152
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
153+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
154+
list(APPEND ARCH_FLAGS -mavx512vnni)
155+
endif()
150156
endif()
151157
if (GGML_AVX512_BF16)
152158
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
153159
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
160+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
161+
list(APPEND ARCH_FLAGS -mavx512bf16)
162+
endif()
154163
endif()
155164
if (GGML_AMX_TILE)
156165
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)

ggml/src/ggml.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@
4949

5050
#define UNUSED GGML_UNUSED
5151

52+
#if defined(_MSC_VER)
53+
#define m512bh(p) p
54+
#define m512i(p) p
55+
#else
56+
#define m512bh(p) (__m512bh)(p)
57+
#define m512i(p) (__m512i)(p)
58+
#endif
59+
5260
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
5361
float ggml_table_f32_f16[1 << 16];
5462

scripts/compare-llama-bench.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@
1919

2020
# Properties by which to differentiate results per commit:
2121
KEY_PROPERTIES = [
22-
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
23-
"blas", "model_filename", "model_type", "n_batch", "n_ubatch", "embeddings", "n_threads",
24-
"type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
22+
"cpu_info", "gpu_info", "backends", "n_gpu_layers", "model_filename", "model_type", "n_batch", "n_ubatch",
23+
"embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v", "use_mmap", "no_kv_offload",
24+
"split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
2525
]
2626

2727
# Properties that are boolean and are converted to Yes/No for the table:
28-
BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
28+
BOOL_PROPERTIES = ["embeddings", "cpu_strict", "use_mmap", "no_kv_offload", "flash_attn"]
2929

3030
# Header names for the table:
3131
PRETTY_NAMES = {
32-
"cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
33-
"gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
34-
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
35-
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
36-
"main_gpu": "Main GPU", "no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split",
37-
"use_mmap": "Use mmap", "embeddings": "Embeddings",
32+
"cpu_info": "CPU", "gpu_info": "GPU", "backends": "Backends", "n_gpu_layers": "GPU layers",
33+
"model_filename": "File", "model_type": "Model", "model_size": "Model size [GiB]",
34+
"model_n_params": "Num. of par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
35+
"embeddings": "Embeddings", "cpu_mask": "CPU mask", "cpu_strict": "CPU strict", "poll": "Poll",
36+
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "split_mode": "Split mode", "main_gpu": "Main GPU",
37+
"no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split", "use_mmap": "Use mmap",
3838
}
3939

4040
DEFAULT_SHOW = ["model_type"] # Always show these properties by default.

0 commit comments

Comments
 (0)