@@ -4190,15 +4190,18 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
41904190 sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
41914191#endif
41924192 for (; ib < nb; ++ib) {
4193- int sumi = 0;
4193+ int sumi0 = 0;
4194+ int sumi1 = 0;
41944195
41954196 for (int j = 0; j < qk/2; ++j) {
41964197 const int v0 = (x[ib].qs[j] & 0x0F) - 8;
41974198 const int v1 = (x[ib].qs[j] >> 4) - 8;
41984199
4199- sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4200+ sumi0 += (v0 * y[ib].qs[j]);
4201+ sumi1 += (v1 * y[ib].qs[j + qk/2]);
42004202 }
42014203
4204+ int sumi = sumi0 + sumi1;
42024205 sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
42034206 }
42044207
@@ -4474,15 +4477,18 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
44744477 sumf = hsum_float_8(acc) + summs;
44754478#endif
44764479 for (; ib < nb; ++ib) {
4477- int sumi = 0;
4480+ int sumi0 = 0;
4481+ int sumi1 = 0;
44784482
44794483 for (int j = 0; j < qk/2; ++j) {
44804484 const int v0 = (x[ib].qs[j] & 0x0F);
44814485 const int v1 = (x[ib].qs[j] >> 4);
44824486
4483- sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4487+ sumi0 += (v0 * y[ib].qs[j]);
4488+ sumi1 += (v1 * y[ib].qs[j + qk/2]);
44844489 }
44854490
4491+ int sumi = sumi0 + sumi1;
44864492 sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
44874493 }
44884494
@@ -4823,18 +4829,21 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
48234829 uint32_t qh;
48244830 memcpy(&qh, x[ib].qh, sizeof(qh));
48254831
4826- int sumi = 0;
4832+ int sumi0 = 0;
4833+ int sumi1 = 0;
48274834
48284835 for (int j = 0; j < qk/2; ++j) {
48294836 const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
48304837 const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
48314838
4832- const int32_t x0 = (( x[ib].qs[j] & 0x0F) | xh_0) - 16;
4833- const int32_t x1 = (( x[ib].qs[j] >> 4) | xh_1) - 16;
4839+ const int32_t x0 = (int8_t)((( x[ib].qs[j] & 0x0F) | xh_0) - 16) ;
4840+ const int32_t x1 = (int8_t)((( x[ib].qs[j] >> 4) | xh_1) - 16) ;
48344841
4835- sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
4842+ sumi0 += (x0 * y[ib].qs[j]);
4843+ sumi1 += (x1 * y[ib].qs[j + qk/2]);
48364844 }
48374845
4846+ int sumi = sumi0 + sumi1;
48384847 sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi;
48394848 }
48404849
@@ -5194,7 +5203,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
51945203 uint32_t qh;
51955204 memcpy(&qh, x[ib].qh, sizeof(qh));
51965205
5197- int sumi = 0;
5206+ int sumi0 = 0;
5207+ int sumi1 = 0;
51985208
51995209 for (int j = 0; j < qk/2; ++j) {
52005210 const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
@@ -5203,9 +5213,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
52035213 const int32_t x0 = (x[ib].qs[j] & 0xF) | xh_0;
52045214 const int32_t x1 = (x[ib].qs[j] >> 4) | xh_1;
52055215
5206- sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
5216+ sumi0 += (x0 * y[ib].qs[j]);
5217+ sumi1 += (x1 * y[ib].qs[j + qk/2]);
52075218 }
52085219
5220+ int sumi = sumi0 + sumi1;
52095221 sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
52105222 }
52115223
0 commit comments