Skip to content

Commit ae670db

Browse files
committed
no repacking for avx2 for kcpp because it breaks 4_0_4_4 quants
1 parent 7030ebf commit ae670db

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5642,11 +5642,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
56425642
}
56435643
}
56445644
} else if (cur->type == GGML_TYPE_Q4_K) {
5645-
if (ggml_cpu_has_avx2()) {
5646-
if (cur->ne[1] % 8 == 0) {
5647-
return &ggml::cpu::aarch64::q4_K_8x8_q8_K;
5648-
}
5649-
}
5645+
// if (ggml_cpu_has_avx2()) { //we shall just use the regular avx2 handling, no repacking otherwise massive slowdown with gpu
5646+
// if (cur->ne[1] % 8 == 0) {
5647+
// return &ggml::cpu::aarch64::q4_K_8x8_q8_K;
5648+
// }
5649+
// }
56505650
} else if (cur->type == GGML_TYPE_IQ4_NL) {
56515651
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
56525652
if (cur->ne[1] % 4 == 0) {

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
286286

287287
// add extra buffer types, only if no GPU device is present
288288
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
289-
if (!has_gpu_device) {
289+
if (true) { //kcpp needs this to be true, otherwise 4_0_4_4 quants will break. avx2 repacking dont affect us cause we disabled it
290290
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
291291
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
292292
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)

0 commit comments

Comments
 (0)