@@ -284,45 +284,6 @@ void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
284284
285285 float32x4_t v_acc = vec_splats (0.0f );
286286
287- #pragma GCC unroll 8
288- for (; ib + 1 < nb ; ib += 2 ) {
289- const block_mxfp4 * GGML_RESTRICT x0 = & x [ib + 0 ];
290- const block_mxfp4 * GGML_RESTRICT x1 = & x [ib + 1 ];
291- const block_q8_0 * GGML_RESTRICT y0 = & y [ib + 0 ];
292- const block_q8_0 * GGML_RESTRICT y1 = & y [ib + 1 ];
293-
294- const uint8x16_t v_x0 = vec_xl (0 , x0 -> qs );
295- const uint8x16_t v_x1 = vec_xl (0 , x1 -> qs );
296-
297- int8x16_t v_x0l = (int8x16_t )vec_and (v_x0 , v_m );
298- int8x16_t v_x0h = (int8x16_t )vec_sr (v_x0 , 4 );
299- int8x16_t v_x1l = (int8x16_t )vec_and (v_x1 , v_m );
300- int8x16_t v_x1h = (int8x16_t )vec_sr (v_x1 , 4 );
301-
302- v_x0l = vec_perm (v_k , v_k , (uchar8x16_t )v_x0l );
303- v_x0h = vec_perm (v_k , v_k , (uchar8x16_t )v_x0h );
304- v_x1l = vec_perm (v_k , v_k , (uchar8x16_t )v_x1l );
305- v_x1h = vec_perm (v_k , v_k , (uchar8x16_t )v_x1h );
306-
307- const int8x16_t v_y0l = vec_xl (0 , y0 -> qs );
308- const int8x16_t v_y0h = vec_xl (QK8_0 /2 , y0 -> qs );
309- const int8x16_t v_y1l = vec_xl (0 , y1 -> qs );
310- const int8x16_t v_y1h = vec_xl (QK8_0 /2 , y1 -> qs );
311-
312- const int32x4_t v_xy0 = ggml_vec_dot (ggml_vec_dot (vec_splats (0 ), v_x0l , v_y0l ), v_x0h , v_y0h );
313- const int32x4_t v_xy1 = ggml_vec_dot (ggml_vec_dot (vec_splats (0 ), v_x1l , v_y1l ), v_x1h , v_y1h );
314-
315- const float32x4_t v_xy0f = vec_float (v_xy0 );
316- const float32x4_t v_xy1f = vec_float (v_xy1 );
317-
318- const float32x4_t v_d0 = vec_splats (GGML_E8M0_TO_FP32_HALF (x0 -> e ) * GGML_CPU_FP16_TO_FP32 (y0 -> d ));
319- const float32x4_t v_d1 = vec_splats (GGML_E8M0_TO_FP32_HALF (x1 -> e ) * GGML_CPU_FP16_TO_FP32 (y1 -> d ));
320-
321- v_acc = vec_madd (v_xy0f , v_d0 , v_acc );
322- v_acc = vec_madd (v_xy1f , v_d1 , v_acc );
323- }
324-
325- #pragma GCC unroll 8
326287 for (; ib < nb ; ++ ib ) {
327288 const block_mxfp4 * GGML_RESTRICT x0 = & x [ib + 0 ];
328289 const block_q8_0 * GGML_RESTRICT y0 = & y [ib + 0 ];
0 commit comments