Skip to content

Commit ac86e85

Browse files
committed
bugfixes
1 parent 472c495 commit ac86e85

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3371,7 +3371,8 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
33713371
opt_for_reorder(&ctx, src0, src1, dst, mul_mat_algo::DMMV);
33723372
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_dequantize_mul_mat_vec, convert_src1_to_q8_1);
33733373
} else if (use_mul_mat_vec_q) {
3374-
bool convert_src1_to_q8_1 = ctx.opt_feature.can_use_intel_builtins ? false : true;
3374+
// do not quantize the input for q6_k case we use the gemv with fused quantization
3375+
bool convert_src1_to_q8_1 = (ctx.opt_feature.can_use_intel_builtins && src0->type == GGML_TYPE_Q6_K) ? false : true;
33753376
opt_for_reorder(&ctx, src0, src1, dst, mul_mat_algo::MMVQ);
33763377
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_vec_q, convert_src1_to_q8_1);
33773378
} else if (use_mul_mat_q) {

ggml/src/ggml-sycl/q6_k_tiled_gemv.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
#include <sys/types.h>
55

66
#include <cstdint>
7-
#include <tuple>
8-
97
#include <sycl/sycl.hpp>
8+
#include <tuple>
109

1110
#include "builtins.hpp"
1211
#include "cacheopts.hpp"
@@ -19,7 +18,7 @@ __attribute__((always_inline)) inline std::tuple<int, float> quantize_and_pack_i
1918
int packed_quants = 0;
2019
#pragma unroll(4)
2120
for (int i = 0; i < 4; i++) {
22-
amax = sycl::fmax(amax, sycl::fabs(loaded_fp32_vals[i]));
21+
amax = sycl::fmax(amax, sycl::fabs(loaded_fp32_vals[i]));
2322
}
2423

2524
float amax_value_to_contribute = wi_id_in_sg > 7 ? 0 : amax;
@@ -143,7 +142,8 @@ __attribute__((always_inline)) inline void q6k_tiled_gemv(const int8_t * q6_k_l,
143142
(intptr_t) (q6_k_h), q6_k_h_width, m - 1, q6_k_h_width,
144143
vector_types::uint2{ (uint) (q6_h_w_coord_start + j / 4), (uint) h_coord });
145144

146-
auto loaded_fp32_vals = *reinterpret_cast<const sycl::vec<float, 4> *>(q8_1 + element_width_offset + j);
145+
auto loaded_fp32_vals =
146+
*reinterpret_cast<const sycl::vec<float, 4> *>(q8_1 + element_width_offset + j + wi_id_in_sg * 4);
147147
// int packed_q8_1_vals = __builtin_IB_subgroup_block_read_flat_u8_m1k64v1(
148148
// (intptr_t) (q8_1), q8_1_width, 0, q8_1_width,
149149
// vector_types::uint2{ (uint) (element_width_offset + j), (uint) 0 });

0 commit comments

Comments
 (0)