Skip to content

Commit 9cbb833

Browse files
committed
disable q8_1 quantization only when dst->ne[1] is 1
1 parent ac86e85 commit 9cbb833

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3372,7 +3372,8 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
33723372
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_dequantize_mul_mat_vec, convert_src1_to_q8_1);
33733373
} else if (use_mul_mat_vec_q) {
33743374
// do not quantize the input for q6_k case we use the gemv with fused quantization
3375-
bool convert_src1_to_q8_1 = (ctx.opt_feature.can_use_intel_builtins && src0->type == GGML_TYPE_Q6_K) ? false : true;
3375+
bool convert_src1_to_q8_1 =
3376+
(ctx.opt_feature.can_use_intel_builtins && src0->type == GGML_TYPE_Q6_K && dst->ne[1] == 1) ? false : true;
33763377
opt_for_reorder(&ctx, src0, src1, dst, mul_mat_algo::MMVQ);
33773378
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_vec_q, convert_src1_to_q8_1);
33783379
} else if (use_mul_mat_q) {

0 commit comments

Comments
 (0)