@@ -525,7 +525,7 @@ void ggml_gemv_q4_K_8x8_q8_K(int                        n,
525525    UNUSED (ncols_interleaved);
526526    UNUSED (blocklen);
527527
528- #if  ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) 
528+ #if  ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
529529    constexpr  int  col_pairs = ncols_interleaved / 2 ;
530530    const  uint8x16_t  m4b = vdupq_n_u8 (0x0f );
531531
@@ -596,15 +596,15 @@ void ggml_gemv_q4_K_8x8_q8_K(int                        n,
596596                    uint8x16_t  q4_qs_cp_2 = vld1q_u8 (q4_base + 16  * cp + 128 );
597597                    uint8x16_t  q4_qs_cp_3 = vld1q_u8 (q4_base + 16  * cp + 192 );
598598
599-                     acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_0, m4b)), q8_qs[0 ]); //  0 .. 7
600-                     acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_1, m4b)), q8_qs[1 ]); //  8 ..15
601-                     acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_2, m4b)), q8_qs[2 ]); //  16..23
602-                     acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_3, m4b)), q8_qs[3 ]); //  24..31
599+                     acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_0, m4b)), q8_qs[0 ]); //  0 .. 7
600+                     acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_1, m4b)), q8_qs[1 ]); //  8 ..15
601+                     acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_2, m4b)), q8_qs[2 ]); //  16..23
602+                     acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_3, m4b)), q8_qs[3 ]); //  24..31
603603
604-                     acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_0, 4 )), q8_qs[4 ]); //  32..39
605-                     acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_1, 4 )), q8_qs[5 ]); //  40..47
606-                     acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_2, 4 )), q8_qs[6 ]); //  48..55
607-                     acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_3, 4 )), q8_qs[7 ]); //  56..63
604+                     acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_0, 4 )), q8_qs[4 ]); //  32..39
605+                     acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_1, 4 )), q8_qs[5 ]); //  40..47
606+                     acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_2, 4 )), q8_qs[6 ]); //  48..55
607+                     acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_3, 4 )), q8_qs[7 ]); //  56..63
608608                }
609609
610610
@@ -652,7 +652,7 @@ void ggml_gemv_q4_K_8x8_q8_K(int                        n,
652652        vst1q_f32 (s + base + 4 , acc_f32[1 ]);
653653    }  //  for x
654654    return ;
655- #endif 
655+ #endif   //  #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) 
656656    ggml_gemv_q4_K_8x8_q8_K_generic (n, s, bs, vx, vy, nr, nc);
657657}
658658
0 commit comments