Skip to content

Commit cf927d8

Browse files
committed
ggml-cpu: missing delta calc
Signed-off-by: Aaron Teo <[email protected]>
1 parent 3538930 commit cf927d8

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

ggml/src/ggml-cpu/arch/s390/quants.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
282282
const int8x16_t v_k = vec_xl(0, kvalues_mxfp4);
283283
const uint8x16_t v_m = vec_splats((const uint8_t)0x0F);
284284

285+
float32x4_t v_acc = vec_splats(0.0f);
286+
285287
for (; ib < nb; ++ib) {
286288
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib];
287289
const block_q8_0 * GGML_RESTRICT y0 = &y[ib];
@@ -297,11 +299,13 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
297299
const int8x16_t v_yh = vec_xl(QK8_0/2, y0->qs);
298300

299301
const int32x4_t v_xy = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_xl, v_yl), v_xh, v_yh);
302+
const float32x4_t v_xyf = vec_float(v_xy);
303+
const float32x4_t v_d = vec_splats(0, GGML_E8M0_TO_FP32_HALF(x0->e) * GGML_CPU_FP16_TO_FP32(y0->d));
300304

301-
const float scale = GGML_E8M0_TO_FP32(x0->e) * GGML_CPU_FP16_TO_FP32(y0->d);
302-
sumf += scale * vec_hsum_i32x4(v_xy);
305+
acc = vec_madd(v_xyf, v_d, acc);
303306
}
304307

308+
sumf = vec_hsum_f32x4(v_acc);
305309
*s = sumf;
306310
#else
307311
UNUSED(x);

0 commit comments

Comments
 (0)