@@ -921,15 +921,15 @@ void arm_correlate_q7(
921
921
const q7_t * pIn2 = pSrcB + (srcBLen - 1U ); /* InputB pointer */
922
922
q31_t sum ; /* Accumulator */
923
923
uint32_t i = 0U , j ; /* Loop counters */
924
- uint32_t inv = 0U ; /* Reverse order flag */
924
+ int32_t inc = 1 ; /* Destination address modifier */
925
925
uint32_t tot = 0U ; /* Length */
926
926
927
927
/* The algorithm implementation is based on the lengths of the inputs. */
928
928
/* srcB is always made to slide across srcA. */
929
929
/* So srcBLen is always considered as shorter or equal to srcALen */
930
930
/* But CORR(x, y) is reverse of CORR(y, x) */
931
931
/* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */
932
- /* and a varaible, inv is set to 1 */
932
+ /* and a varaible, inc is set to - 1 */
933
933
/* If lengths are not equal then zero pad has to be done to make the two
934
934
* inputs of same length. But to improve the performance, we include zeroes
935
935
* in the output instead of zero padding either of the the inputs*/
@@ -968,8 +968,8 @@ void arm_correlate_q7(
968
968
srcALen = srcBLen ;
969
969
srcBLen = j ;
970
970
971
- /* Setting the reverse flag */
972
- inv = 1 ;
971
+ /* Filling destination in reverse order */
972
+ inc = - 1 ;
973
973
}
974
974
975
975
/* Loop to calculate convolution for output length number of times */
@@ -990,10 +990,8 @@ void arm_correlate_q7(
990
990
}
991
991
992
992
/* Store the output in the destination buffer */
993
- if (inv == 1 )
994
- * pDst -- = (q7_t ) __SSAT ((sum >> 7U ), 8U );
995
- else
996
- * pDst ++ = (q7_t ) __SSAT ((sum >> 7U ), 8U );
993
+ * pDst = (q7_t ) __SSAT ((sum >> 7U ), 8U );
994
+ pDst += inc ;
997
995
}
998
996
999
997
#endif /* #if !defined(ARM_MATH_CM0_FAMILY) */
0 commit comments