@@ -88,9 +88,9 @@ void hvx_mul_f32_opt(const uint8_t * restrict src0,
8888 const uint8_t * restrict src1 ,
8989 uint8_t * restrict dst ,
9090 const int num_elems ) {
91- htp_binary_ops_preamble
91+ htp_binary_ops_preamble ;
9292
93- for (int i = 0 ; i < step_of_4 ; i ++ ) {
93+ for (int i = 0 ; i < step_of_4 ; i ++ ) {
9494 HVX_Vector v1a = * (HVX_Vector * ) src0_curr ;
9595
9696 HVX_Vector v1b = * (HVX_Vector * ) src1_curr ;
@@ -129,6 +129,7 @@ void hvx_mul_f32_opt(const uint8_t * restrict src0,
129129
130130 dst_curr += 4 * VLEN ;
131131 }
132+
132133 for (int i = 0 ; i < step_of_2 ; i ++ ) {
133134 HVX_Vector v1a = * (HVX_Vector * ) src0_curr ;
134135
@@ -152,6 +153,7 @@ void hvx_mul_f32_opt(const uint8_t * restrict src0,
152153
153154 dst_curr += 2 * VLEN ;
154155 }
156+
155157 for (int i = 0 ; i < step_of_1 ; i ++ ) {
156158 HVX_Vector va = * (HVX_Vector * ) src0_curr ;
157159
@@ -167,6 +169,7 @@ void hvx_mul_f32_opt(const uint8_t * restrict src0,
167169
168170 dst_curr += VLEN ;
169171 }
172+
170173 if (remaining > 0 ) {
171174 HVX_Vector v = Q6_Vqf32_vmpy_VsfVsf (* (HVX_Vector * ) src0_curr , * (HVX_Vector * ) src1_curr );
172175 hvx_vec_store_u ((void * ) dst_curr , remaining * SIZEOF_FP32 , Q6_Vsf_equals_Vqf32 (v ));
@@ -297,9 +300,9 @@ void hvx_add_f32_opt(const uint8_t * restrict src0,
297300 const uint8_t * restrict src1 ,
298301 uint8_t * restrict dst ,
299302 const int num_elems ) {
300- htp_binary_ops_preamble
303+ htp_binary_ops_preamble ;
301304
302- for (int i = 0 ; i < step_of_4 ; i ++ ) {
305+ for (int i = 0 ; i < step_of_4 ; i ++ ) {
303306 HVX_Vector v1a = * (HVX_Vector * ) src0_curr ;
304307
305308 HVX_Vector v1b = * (HVX_Vector * ) src1_curr ;
@@ -539,9 +542,9 @@ void hvx_sub_f32_opt(const uint8_t * restrict src0,
539542 const uint8_t * restrict src1 ,
540543 uint8_t * restrict dst ,
541544 const int num_elems ) {
542- htp_binary_ops_preamble
545+ htp_binary_ops_preamble ;
543546
544- for (int i = 0 ; i < step_of_4 ; i ++ ) {
547+ for (int i = 0 ; i < step_of_4 ; i ++ ) {
545548 HVX_Vector v1a = * (HVX_Vector * ) src0_curr ;
546549
547550 HVX_Vector v1b = * (HVX_Vector * ) src1_curr ;
0 commit comments