Skip to content

Commit 1f7f498

Browse files
committed
Enabled q4_K_8x8_q8_K path on ARM
1 parent 8cf6b42 commit 1f7f498

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
5555
#define ggml_gemv_iq4_nl_8x8_q8_0_generic ggml_gemv_iq4_nl_8x8_q8_0
5656
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
57-
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
57+
// #define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
5858
#define ggml_gemm_iq4_nl_8x8_q8_0_generic ggml_gemm_iq4_nl_8x8_q8_0
5959
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
6060
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)

ggml/src/ggml-cpu/arch/arm/repack.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,3 +1889,29 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const
18891889
#endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
18901890
ggml_gemm_iq4_nl_4x4_q8_0_generic(n, s, bs, vx, vy, nr, nc);
18911891
}
1892+
1893+
void ggml_gemv_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1894+
GGML_ABORT("Expected in GEMV");
1895+
}
1896+
1897+
void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1898+
const int qk = QK8_0;
1899+
const int nb = n / qk;
1900+
const int ncols_interleaved = 8;
1901+
const int blocklen = 8;
1902+
1903+
assert (n % qk == 0);
1904+
assert (nc % ncols_interleaved == 0);
1905+
1906+
UNUSED(s);
1907+
UNUSED(bs);
1908+
UNUSED(vx);
1909+
UNUSED(vy);
1910+
UNUSED(nr);
1911+
UNUSED(nc);
1912+
UNUSED(nb);
1913+
UNUSED(ncols_interleaved);
1914+
UNUSED(blocklen);
1915+
1916+
GGML_ABORT("Expected in GEMM");
1917+
}

ggml/src/ggml-cpu/repack.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,6 +1846,8 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
18461846
if (cur->ne[1] % 8 == 0) {
18471847
return &q4_K_8x8_q8_K;
18481848
}
1849+
} else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
1850+
return &q4_K_8x8_q8_K;
18491851
}
18501852
} else if (cur->type == GGML_TYPE_Q2_K) {
18511853
if (ggml_cpu_has_avx512()) {

0 commit comments

Comments
 (0)