|
1 | 1 | diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp |
2 | | -index 74c1c029..134b6daa 100644 |
| 2 | +index 74c1c02..604bd17 100644 |
3 | 3 | --- a/ggml/src/ggml-cpu/repack.cpp |
4 | 4 | +++ b/ggml/src/ggml-cpu/repack.cpp |
5 | | -@@ -1424,35 +1424,37 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons |
| 5 | +@@ -1424,6 +1424,7 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons |
6 | 6 | // instance for IQ4 |
7 | 7 | static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0; |
8 | 8 |
|
9 | | -- if (cur->type == GGML_TYPE_Q4_0) { |
10 | | -- if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { |
11 | | -- if (cur->ne[1] % 8 == 0) { |
12 | | -- return &q4_0_8x8_q8_0; |
13 | | -- } |
14 | | -- } |
15 | | -- if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { |
16 | | -- if (cur->ne[1] % 4 == 0) { |
17 | | -- return &q4_0_4x8_q8_0; |
18 | | -- } |
19 | | -- } |
20 | | -- if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { |
21 | | -- if (cur->ne[1] % 4 == 0) { |
22 | | -- return &q4_0_4x4_q8_0; |
23 | | -- } |
24 | | -- } |
25 | | -- } else if (cur->type == GGML_TYPE_Q4_K) { |
26 | | -- if (ggml_cpu_has_avx2()) { |
27 | | -- if (cur->ne[1] % 8 == 0) { |
28 | | -- return &q4_K_8x8_q8_K; |
29 | | -- } |
30 | | -- } |
31 | | -- } else if (cur->type == GGML_TYPE_IQ4_NL) { |
32 | | -- if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { |
33 | | -- if (cur->ne[1] % 4 == 0) { |
34 | | -- return &iq4_nl_4x4_q8_0; |
35 | | -- } |
36 | | -- } |
37 | | -- } |
38 | | -+ // if (cur->type == GGML_TYPE_Q4_0) { |
39 | | -+ // if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { |
40 | | -+ // if (cur->ne[1] % 8 == 0) { |
41 | | -+ // return &q4_0_8x8_q8_0; |
42 | | -+ // } |
43 | | -+ // } |
44 | | -+ // if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { |
45 | | -+ // if (cur->ne[1] % 4 == 0) { |
46 | | -+ // return &q4_0_4x8_q8_0; |
47 | | -+ // } |
48 | | -+ // } |
49 | | -+ // if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { |
50 | | -+ // if (cur->ne[1] % 4 == 0) { |
51 | | -+ // return &q4_0_4x4_q8_0; |
52 | | -+ // } |
53 | | -+ // } |
54 | | -+ // } else if (cur->type == GGML_TYPE_Q4_K) { |
55 | | -+ // if (ggml_cpu_has_avx2()) { |
56 | | -+ // if (cur->ne[1] % 8 == 0) { |
57 | | -+ // return &q4_K_8x8_q8_K; |
58 | | -+ // } |
59 | | -+ // } |
60 | | -+ // } else if (cur->type == GGML_TYPE_IQ4_NL) { |
61 | | -+ // if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { |
62 | | -+ // if (cur->ne[1] % 4 == 0) { |
63 | | -+ // return &iq4_nl_4x4_q8_0; |
64 | | -+ // } |
65 | | -+ // } |
66 | | -+ // } |
| 9 | ++#ifndef __MVS__ |
| 10 | + if (cur->type == GGML_TYPE_Q4_0) { |
| 11 | + if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { |
| 12 | + if (cur->ne[1] % 8 == 0) { |
| 13 | +@@ -1453,6 +1454,9 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons |
| 14 | + } |
| 15 | + } |
| 16 | + } |
| 17 | ++#endif |
67 | 18 | + |
68 | 19 | + GGML_LOG_DEBUG("REPACK DEBUG: %s: Repacking disabled for tensor '%s' (type %s)\n", __func__, ggml_get_name(cur) ? ggml_get_name(cur) : "unnamed", ggml_type_name(cur->type)); |
69 | 20 |
|
|
0 commit comments