Skip to content

Commit 0e8b8ec

Browse files
Revert "Removed some gcc workarounds for Helium that are no more needed."
This reverts commit e71a2ba. Signed-off-by: Ryan McClelland <[email protected]>
1 parent 893dba3 commit 0e8b8ec

File tree

7 files changed

+47
-13
lines changed

7 files changed

+47
-13
lines changed

Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@
4545
@param[out] pDst points to the block of output data
4646
@param[in] blockSize number of samples to process
4747
*/
48-
#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
48+
49+
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
50+
#pragma GCC warning "Scalar version of arm_biquad_cascade_stereo_df2T_f16 built. Helium version has build issues with gcc."
51+
#endif
52+
53+
#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
4954
ARM_DSP_ATTRIBUTE void arm_biquad_cascade_stereo_df2T_f16(
5055
const arm_biquad_cascade_stereo_df2T_instance_f16 * S,
5156
const float16_t * pSrc,

Source/FilteringFunctions/arm_correlate_q7.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,11 @@
5656
Refer to \ref arm_correlate_opt_q7() for a faster implementation of this function.
5757
*/
5858

59-
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
59+
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
60+
#pragma GCC warning "Scalar version of arm_correlate_q7 built. Helium version has build issues with gcc."
61+
#endif
62+
63+
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
6064
#include "arm_helium_utils.h"
6165

6266
#include "arm_vec_filtering.h"

Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@
5151
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
5252
*/
5353

54-
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
54+
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
55+
#pragma GCC warning "Scalar version of arm_mat_cmplx_mult_f16 built. Helium version has build issues with gcc."
56+
#endif
57+
58+
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
5559

5660
#include "arm_helium_utils.h"
5761

Source/StatisticsFunctions/arm_absmax_q7.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@
4545
@param[out] pIndex index of maximum value returned here
4646
*/
4747

48-
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
48+
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
49+
#pragma GCC warning "Scalar version of arm_absmax_q7 built. Helium version has build issues with gcc."
50+
#endif
51+
52+
53+
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
4954

5055
#include <stdint.h>
5156
#include "arm_helium_utils.h"

Source/TransformFunctions/arm_rfft_q15.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ ARM_DSP_ATTRIBUTE void arm_rfft_q15(
159159
#include "arm_helium_utils.h"
160160
#include "arm_vec_fft.h"
161161

162+
#if defined(ARM_DSP_BUILT_WITH_GCC)
163+
#define MVE_CMPLX_MULT_FX_AxB_S16(A,B) vqdmladhxq_s16(vqdmlsdhq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)
164+
#define MVE_CMPLX_MULT_FX_AxConjB_S16(A,B) vqdmladhq_s16(vqdmlsdhxq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)
165+
166+
#endif
162167

163168
ARM_DSP_ATTRIBUTE void arm_split_rfft_q15(
164169
q15_t * pSrc,
@@ -200,9 +205,13 @@ ARM_DSP_ATTRIBUTE void arm_split_rfft_q15(
200205
q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef);
201206
q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);
202207

203-
208+
#if defined(ARM_DSP_BUILT_WITH_GCC)
209+
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA),
210+
MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2));
211+
#else
204212
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t),
205213
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t));
214+
#endif
206215
vst1q_s16(pOut1, out);
207216
pOut1 += 8;
208217

Source/TransformFunctions/arm_rfft_q31.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ ARM_DSP_ATTRIBUTE void arm_rfft_q31(
157157
#include "arm_helium_utils.h"
158158
#include "arm_vec_fft.h"
159159

160+
#if defined(ARM_DSP_BUILT_WITH_GCC)
161+
162+
#define MVE_CMPLX_MULT_FX_AxB_S32(A,B) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
163+
#define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
164+
165+
#endif
160166

161167
ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
162168
q31_t *pSrc,
@@ -193,9 +199,12 @@ ARM_DSP_ATTRIBUTE void arm_split_rfft_q31(
193199
q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
194200
q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
195201
q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
196-
202+
#if defined(ARM_DSP_BUILT_WITH_GCC)
203+
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
204+
#else
197205
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t),
198206
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t));
207+
#endif
199208
vst1q(pOut1, out);
200209
pOut1 += 4;
201210

@@ -348,9 +357,13 @@ ARM_DSP_ATTRIBUTE void arm_split_rifft_q31(
348357
q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
349358

350359
/* can we avoid the conjugate here ? */
360+
#if defined(ARM_DSP_BUILT_WITH_GCC)
361+
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
362+
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
363+
#else
351364
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t),
352365
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t)));
353-
366+
#endif
354367
vst1q_s32(pDst, out);
355368
pDst += 4;
356369

dsppp/Include/dsppp/arch_detection.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,6 @@ extern "C"
1616
#elif defined ( __APPLE_CC__ )
1717
#pragma GCC diagnostic ignored "-Wold-style-cast"
1818

19-
#elif defined(__clang__)
20-
#pragma GCC diagnostic push
21-
#pragma GCC diagnostic ignored "-Wsign-conversion"
22-
#pragma GCC diagnostic ignored "-Wconversion"
23-
#pragma GCC diagnostic ignored "-Wunused-parameter"
24-
2519
#elif defined ( __GNUC__ )
2620
#pragma GCC diagnostic push
2721
#pragma GCC diagnostic ignored "-Wsign-conversion"

0 commit comments

Comments
 (0)