@@ -343,9 +343,9 @@ static void vec_dot_q4x4x2_q8x4x2(const int n, float * restrict s, const void *
343343 }
344344
345345 // Reduce and convert into fp32
346- r0_sum = hvx_vec_qf32_reduce_sum ( r0_sum );
346+ r0_sum = hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
347347
348- hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
348+ hvx_vec_store_u (& s [0 ], 4 , r0_sum );
349349}
350350
351351static void vec_dot_q4x4x2_q8x4x2_rx2 (const int n ,
@@ -516,9 +516,9 @@ static void vec_dot_q8x4x2_q8x4x2(const int n, float * restrict s, const void *
516516 }
517517
518518 // Reduce and convert into fp32
519- r0_sum = hvx_vec_qf32_reduce_sum ( r0_sum );
519+ r0_sum = hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
520520
521- hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
521+ hvx_vec_store_u (& s [0 ], 4 , r0_sum );
522522}
523523
524524static void vec_dot_q8x4x2_q8x4x2_rx2 (const int n ,
@@ -722,9 +722,9 @@ static void vec_dot_mxfp4x4x2_q8x4x2(const int n,
722722 }
723723
724724 // Reduce and convert into fp32
725- r0_sum = hvx_vec_qf32_reduce_sum ( r0_sum );
725+ r0_sum = hvx_vec_fp32_reduce_sum ( Q6_Vsf_equals_Vqf32 ( r0_sum ) );
726726
727- hvx_vec_store_u (& s [0 ], 4 , Q6_Vsf_equals_Vqf32 ( r0_sum ) );
727+ hvx_vec_store_u (& s [0 ], 4 , r0_sum );
728728}
729729
730730static void vec_dot_mxfp4x4x2_q8x4x2_rx2 (const int n ,
0 commit comments