@@ -5654,8 +5654,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
56545654
56555655 for (int i = 0 ; i < nb ; ++ i ) {
56565656
5657- const float d = y [i ].d * ( float ) x [i ].d ;
5658- const float dmin = - y [i ].d * ( float ) x [i ].dmin ;
5657+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
5658+ const float dmin = - y [i ].d * GGML_FP16_TO_FP32 ( x [i ].dmin ) ;
56595659
56605660 const uint8_t * restrict q2 = x [i ].qs ;
56615661 const int8_t * restrict q8 = y [i ].qs ;
@@ -5804,8 +5804,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
58045804
58055805 for (int i = 0 ; i < nb ; ++ i ) {
58065806
5807- const float d = y [i ].d * ( float ) x [i ].d ;
5808- const float dmin = - y [i ].d * ( float ) x [i ].dmin ;
5807+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
5808+ const float dmin = - y [i ].d * GGML_FP16_TO_FP32 ( x [i ].dmin ) ;
58095809
58105810 const uint8_t * restrict q2 = x [i ].qs ;
58115811 const int8_t * restrict q8 = y [i ].qs ;
@@ -6458,7 +6458,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
64586458
64596459 int32_t isum = -4 * (scales [0 ] * y [i ].bsums [0 ] + scales [2 ] * y [i ].bsums [1 ] + scales [1 ] * y [i ].bsums [2 ] + scales [3 ] * y [i ].bsums [3 ]);
64606460
6461- const float d = y [i ].d * ( float ) x [i ].d ;
6461+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
64626462
64636463 const uint8x16_t htmp = vcombine_u8 (hbits , vshr_n_u8 (hbits , 1 ));
64646464 q3h .val [0 ] = vandq_u8 (mh , vshlq_n_u8 (htmp , 2 ));
@@ -6660,7 +6660,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
66606660
66616661 int32_t isum = -4 * (scales [0 ] * y [i ].bsums [0 ] + scales [2 ] * y [i ].bsums [1 ] + scales [1 ] * y [i ].bsums [2 ] + scales [3 ] * y [i ].bsums [3 ]);
66626662
6663- const float d = y [i ].d * ( float ) x [i ].d ;
6663+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
66646664
66656665 vint32m1_t vzero = __riscv_vmv_v_x_i32m1 (0 , 1 );
66666666
@@ -7163,9 +7163,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
71637163 aux16 [1 ] = (a [0 ] >> 4 ) & 0x0f0f ;
71647164
71657165 const int32_t summi = scales [2 ] * (y [i ].bsums [0 ] + y [i ].bsums [1 ]) + scales [3 ] * (y [i ].bsums [2 ] + y [i ].bsums [3 ]);
7166- sum_mins += y [i ].d * ( float ) x [i ].d [1 ] * summi ;
7166+ sum_mins += y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d [1 ]) * summi ;
71677167
7168- const float d = y [i ].d * ( float ) x [i ].d [0 ];
7168+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d [0 ]) ;
71697169
71707170 const ggml_uint8x16x2_t q4bits = ggml_vld1q_u8_x2 (q4 );
71717171
@@ -7823,7 +7823,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
78237823
78247824 for (int i = 0 ; i < nb ; ++ i ) {
78257825
7826- const float d = y [i ].d * ( float ) x [i ].d ;
7826+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
78277827 const int8_t * sc = x [i ].scales ;
78287828
78297829 const uint8_t * restrict q5 = x [i ].qs ;
@@ -7965,7 +7965,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
79657965
79667966 for (int i = 0 ; i < nb ; ++ i ) {
79677967
7968- const float d = y [i ].d * ( float ) x [i ].d ;
7968+ const float d = y [i ].d * GGML_FP16_TO_FP32 ( x [i ].d ) ;
79697969 const int8_t * sc = x [i ].scales ;
79707970
79717971 const uint8_t * restrict q5 = x [i ].qs ;
@@ -8533,7 +8533,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
85338533
85348534 for (int i = 0 ; i < nb ; ++ i ) {
85358535
8536- const float d_all = ( float ) x [i ].d ;
8536+ const float d_all = GGML_FP16_TO_FP32 ( x [i ].d ) ;
85378537
85388538 const uint8_t * restrict q6 = x [i ].ql ;
85398539 const uint8_t * restrict qh = x [i ].qh ;
@@ -8704,7 +8704,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
87048704
87058705 for (int i = 0 ; i < nb ; ++ i ) {
87068706
8707- const float d_all = ( float ) x [i ].d ;
8707+ const float d_all = GGML_FP16_TO_FP32 ( x [i ].d ) ;
87088708
87098709 const uint8_t * restrict q6 = x [i ].ql ;
87108710 const uint8_t * restrict qh = x [i ].qh ;
@@ -9523,7 +9523,6 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
95239523 float sumf = 0 ;
95249524
95259525 for (int ib = 0 ; ib < nb ; ib += 2 ) {
9526-
95279526 q4bits .val [0 ] = vld1q_u8 (x [ib + 0 ].qs );
95289527 q4bits .val [1 ] = vld1q_u8 (x [ib + 1 ].qs );
95299528 q8b .val [0 ] = vld1q_s8 (y [ib + 0 ].qs );
@@ -9539,8 +9538,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
95399538 prod_1 = ggml_vdotq_s32 (ggml_vdotq_s32 (vdupq_n_s32 (0 ), q4b .val [0 ], q8b .val [0 ]), q4b .val [1 ], q8b .val [1 ]);
95409539 prod_2 = ggml_vdotq_s32 (ggml_vdotq_s32 (vdupq_n_s32 (0 ), q4b .val [2 ], q8b .val [2 ]), q4b .val [3 ], q8b .val [3 ]);
95419540
9542- sumf += (float )x [ib + 0 ].d * (float )y [ib + 0 ].d * vaddvq_s32 (prod_1 ) + (float )x [ib + 1 ].d * (float )y [ib + 1 ].d * vaddvq_s32 (prod_2 );
9543-
9541+ sumf +=
9542+ GGML_FP16_TO_FP32 (x [ib + 0 ].d ) * GGML_FP16_TO_FP32 (y [ib + 0 ].d ) * vaddvq_s32 (prod_1 ) +
9543+ GGML_FP16_TO_FP32 (x [ib + 1 ].d ) * GGML_FP16_TO_FP32 (y [ib + 1 ].d ) * vaddvq_s32 (prod_2 );
95449544 }
95459545
95469546 * s = sumf ;
0 commit comments