File tree Expand file tree Collapse file tree 2 files changed +6
-6
lines changed Expand file tree Collapse file tree 2 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -5642,11 +5642,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con
56425642 }
56435643 }
56445644 } else if (cur->type == GGML_TYPE_Q4_K) {
5645- if (ggml_cpu_has_avx2 ()) {
5646- if (cur->ne [1 ] % 8 == 0 ) {
5647- return &ggml::cpu::aarch64::q4_K_8x8_q8_K;
5648- }
5649- }
5645+ // if (ggml_cpu_has_avx2()) { //we shall just use the regular avx2 handling, no repacking otherwise massive slowdown with gpu
5646+ // if (cur->ne[1] % 8 == 0) {
5647+ // return &ggml::cpu::aarch64::q4_K_8x8_q8_K;
5648+ // }
5649+ // }
56505650 } else if (cur->type == GGML_TYPE_IQ4_NL) {
56515651 if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod ()) {
56525652 if (cur->ne [1 ] % 4 == 0 ) {
Original file line number Diff line number Diff line change @@ -286,7 +286,7 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
286286
287287 // add extra buffer types, only if no GPU device is present
288288 // ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
289- if (!has_gpu_device ) {
289+ if (true ) { //kcpp needs this to be true, otherwise 4_0_4_4 quants will break. avx2 repacking dont affect us cause we disabled it
290290 auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
291291 auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
292292 auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
You can’t perform that action at this time.
0 commit comments