Skip to content

Commit 5835f58

Browse files
committed
Merge branch 'master' into huydt/mb
2 parents 51eda92 + 3ba0d84 commit 5835f58

File tree

12 files changed

+167
-102
lines changed

12 files changed

+167
-102
lines changed

convert_hf_to_gguf_update.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ class TOKENIZER_TYPE(IntEnum):
129129
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130130
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
131131
{"name": "modern-bert", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/answerdotai/ModernBERT-base", },
132-
{"name": "arcee", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/arcee-ai/AFM-4.5B", }, # TODO confirm final URL
133132
]
134133

135134
# some models are known to be broken upstream, so we will skip them as exceptions

ggml/cmake/common.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ function(ggml_get_system_arch)
3636
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
3737
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
3838
set(GGML_SYSTEM_ARCH "x86" PARENT_SCOPE)
39-
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR
40-
"${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
39+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc|power")
4140
set(GGML_SYSTEM_ARCH "PowerPC" PARENT_SCOPE)
4241
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
4342
set(GGML_SYSTEM_ARCH "loongarch64" PARENT_SCOPE)

ggml/src/CMakeLists.txt

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -311,18 +311,28 @@ if (GGML_CPU_ALL_VARIANTS)
311311
# MSVC doesn't support AMX
312312
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
313313
endif()
314-
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
315-
# Many of these features are optional so we build versions with popular
316-
# combinations and name the backends based on the version they were
317-
# first released with
318-
ggml_add_cpu_backend_variant(armv8.0_1)
319-
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
320-
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
321-
ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
322-
ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
323-
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
324-
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
325-
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
314+
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
315+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
316+
# Many of these features are optional so we build versions with popular
317+
# combinations and name the backends based on the version they were
318+
# first released with
319+
ggml_add_cpu_backend_variant(armv8.0_1)
320+
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
321+
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
322+
ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
323+
ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
324+
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
325+
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
326+
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
327+
elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
328+
# Android-specific backends with SoC-compatible feature sets
329+
ggml_add_cpu_backend_variant(android_armv8.0_1)
330+
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
331+
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
332+
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
333+
else()
334+
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
335+
endif()
326336
else()
327337
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
328338
endif()

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -158,48 +158,45 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
158158
if (GGML_CPU_ARM_ARCH)
159159
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
160160
elseif(GGML_CPU_ALL_VARIANTS)
161-
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
162-
# Begin with the lowest baseline
163-
set(ARM_MCPU "armv8-a")
164-
set(ARCH_TAGS "")
165-
set(ARCH_DEFINITIONS "")
166-
167-
# When a feature is selected, bump the MCPU to the first
168-
# version that supported it
169-
if (GGML_INTERNAL_DOTPROD)
170-
set(ARM_MCPU "armv8.2-a")
171-
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
172-
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
173-
endif()
174-
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
175-
set(ARM_MCPU "armv8.2-a")
176-
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
177-
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
178-
endif()
179-
if (GGML_INTERNAL_SVE)
180-
set(ARM_MCPU "armv8.2-a")
181-
set(ARCH_TAGS "${ARCH_TAGS}+sve")
182-
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
183-
endif()
184-
if (GGML_INTERNAL_MATMUL_INT8)
185-
set(ARM_MCPU "armv8.6-a")
186-
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
187-
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
188-
endif()
189-
if (GGML_INTERNAL_SVE2)
190-
set(ARM_MCPU "armv8.6-a")
191-
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
192-
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
193-
endif()
194-
if (GGML_INTERNAL_SME)
195-
set(ARM_MCPU "armv9.2-a")
196-
set(ARCH_TAGS "${ARCH_TAGS}+sme")
197-
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
198-
endif()
199-
200-
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
201-
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
161+
# Begin with the lowest baseline
162+
set(ARM_MCPU "armv8-a")
163+
set(ARCH_TAGS "")
164+
set(ARCH_DEFINITIONS "")
165+
166+
# When a feature is selected, bump the MCPU to the first
167+
# version that supported it
168+
if (GGML_INTERNAL_DOTPROD)
169+
set(ARM_MCPU "armv8.2-a")
170+
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
171+
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
172+
endif()
173+
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
174+
set(ARM_MCPU "armv8.2-a")
175+
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
176+
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
177+
endif()
178+
if (GGML_INTERNAL_SVE)
179+
set(ARM_MCPU "armv8.2-a")
180+
set(ARCH_TAGS "${ARCH_TAGS}+sve")
181+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
182+
endif()
183+
if (GGML_INTERNAL_MATMUL_INT8)
184+
set(ARM_MCPU "armv8.6-a")
185+
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
186+
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
187+
endif()
188+
if (GGML_INTERNAL_SVE2)
189+
set(ARM_MCPU "armv8.6-a")
190+
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
191+
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
192+
endif()
193+
if (GGML_INTERNAL_SME)
194+
set(ARM_MCPU "armv9.2-a")
195+
set(ARCH_TAGS "${ARCH_TAGS}+sme")
196+
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
202197
endif()
198+
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
199+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
203200
endif()
204201
endif()
205202

ggml/src/ggml-cpu/apple-fallback.h

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#pragma once
2+
3+
// Solve alias issue for Apple targets (currently PowerPC, x86, and ARM64).
4+
// Mach-O has a weak alias equivalent but no practical compiler support can
5+
// be found, so we need to do it manually.
6+
// ref: https://stackoverflow.com/questions/42757744
7+
//
8+
// This file is a complement to native implementations in the `arch` folder.
9+
// A kernel in quants.c or repack.cpp is either:
10+
// - implemented in the `arch` folder, or
11+
// - defined in this file to remove the `_generic` suffix
12+
13+
#if defined(GGML_CPU_GENERIC)
14+
// quants.c
15+
#define quantize_row_q8_0_generic quantize_row_q8_0
16+
#define quantize_row_q8_1_generic quantize_row_q8_1
17+
#define quantize_row_q8_K_generic quantize_row_q8_K
18+
#define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0
19+
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
20+
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
21+
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
22+
#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
23+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
24+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
25+
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
26+
#define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K
27+
#define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K
28+
#define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K
29+
#define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K
30+
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
31+
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
32+
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
33+
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
34+
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
35+
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
36+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
37+
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
38+
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
39+
// repack.cpp
40+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
41+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
42+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
43+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
44+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
45+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
46+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
47+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
48+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
49+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
50+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
51+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
52+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
53+
#elif defined(__aarch64__) || defined(__arm__)
54+
// repack.cpp
55+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
56+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
57+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
58+
#elif defined(__x86_64__) || defined(__i386__)
59+
// repack.cpp
60+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
61+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
62+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
63+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
64+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
65+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
66+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
67+
#elif defined(__POWERPC__)
68+
// ref: https://github.com/ggml-org/llama.cpp/pull/14146#issuecomment-2972561679
69+
// quants.c
70+
#define quantize_row_q8_K_generic quantize_row_q8_K
71+
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
72+
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
73+
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
74+
// repack.cpp
75+
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
76+
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
77+
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
78+
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
79+
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
80+
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
81+
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
82+
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
83+
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
84+
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
85+
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
86+
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
87+
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
88+
#endif

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ void ggml_barrier(struct ggml_threadpool * tp);
509509

510510
#define GGML_DO_PRAGMA_(x) _Pragma (#x)
511511
#define GGML_DO_PRAGMA(x) GGML_DO_PRAGMA_(x)
512-
#if defined(GGML_CPU_GENERIC) || defined(__HIPCC__)
512+
#if defined(GGML_CPU_GENERIC) || defined(__HIPCC__) || defined(__APPLE__)
513513
// Note for Apple targets:
514514
// - clang: aliases are not supported on darwin
515515
// - all native kernels need to be implemented in both x86 and arm files

ggml/src/ggml-cpu/quants.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
#include "ggml-quants.h"
66
#include "quants.h"
77

8+
#if defined(__APPLE__)
9+
#include "apple-fallback.h"
10+
#endif
11+
812
#include <string.h>
913
#include <assert.h>
1014
#include <float.h>

ggml/src/ggml-cpu/quants.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -84,33 +84,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
8484
void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
8585
void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
8686

87-
#if defined(GGML_CPU_GENERIC)
88-
#define quantize_row_q8_0_generic quantize_row_q8_0
89-
#define quantize_row_q8_1_generic quantize_row_q8_1
90-
#define quantize_row_q8_K_generic quantize_row_q8_K
91-
#define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0
92-
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
93-
#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0
94-
#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1
95-
#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0
96-
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
97-
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
98-
#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K
99-
#define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K
100-
#define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K
101-
#define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K
102-
#define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K
103-
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
104-
#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K
105-
#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K
106-
#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K
107-
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
108-
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
109-
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
110-
#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0
111-
#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K
112-
#endif
113-
11487
#ifdef __cplusplus
11588
}
11689
#endif

ggml/src/ggml-cpu/repack.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include "ggml-cpu-impl.h"
99
#include "traits.h"
1010

11+
#if defined(__APPLE__)
12+
#include "apple-fallback.h"
13+
#endif
14+
1115
#include <cmath>
1216
#include <cstring>
1317
#include <cassert>

ggml/src/ggml-cpu/repack.h

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ extern "C" {
6767
// Workaround for clang:
6868
// clang++ complains: ``error: call to 'ggml_gemm_q4_0_4x4_q8_0' is ambiguous''
6969
// repro: https://godbolt.org/z/oKdeWKonM (ICE), https://godbolt.org/z/1szq6P36v (ambiguous call)
70-
#if defined(GGML_CPU_CLANG_WORKAROUND) || !(defined(__GNUC__) && defined(__clang__)) || defined(__HIPCC__)
70+
#if defined(GGML_CPU_CLANG_WORKAROUND) || defined(__APPLE__) || !(defined(__GNUC__) && defined(__clang__)) || defined(__HIPCC__)
7171
void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
7272
void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
7373
void ggml_quantize_mat_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k);
@@ -98,22 +98,6 @@ void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,
9898
void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
9999
void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
100100

101-
#if defined(GGML_CPU_GENERIC)
102-
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
103-
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8
104-
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
105-
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
106-
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
107-
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
108-
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
109-
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
110-
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
111-
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
112-
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
113-
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
114-
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
115-
#endif
116-
117101
#if defined(__cplusplus)
118102
} // extern "C"
119103
#endif

0 commit comments

Comments
 (0)