Skip to content

Commit 4f85c33

Browse files
committed
ggml-cpu: add unroll to boost perf
Signed-off-by: Aaron Teo <[email protected]>
1 parent 5fb1bb9 commit 4f85c33

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

ggml/src/ggml-cpu/arch/s390/quants.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
284284

285285
float32x4_t v_acc = vec_splats(0.0f);
286286

287+
#pragma GCC unroll 8
287288
for (; ib + 1 < nb; ib += 2) {
288289
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
289290
const block_mxfp4 * GGML_RESTRICT x1 = &x[ib + 1];
@@ -321,6 +322,7 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
321322
v_acc = vec_madd(v_xy1f, v_d1, v_acc);
322323
}
323324

325+
#pragma GCC unroll 8
324326
for (; ib < nb; ++ib) {
325327
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
326328
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];

0 commit comments

Comments
 (0)