2929#if defined (__APPLE__) && defined (__x86_64__) && TARGET_OS_OSX
3030# include < x86intrin.h>
3131# define SIMD
32+ # define SIMD_FMA 0 // Disable fma on Apple targets
33+ # define SIMD_AVX 0
3234#elif defined (_MSC_VER)
3335# include < pmmintrin.h>
3436# include < immintrin.h>
3537# include < intrin.h>
3638# define SIMD
39+ # define SIMD_FMA 1
40+ # define SIMD_AVX 1
3741#endif
3842
3943AEOLUS_NAMESPACE_BEGIN
@@ -358,6 +362,7 @@ namespace sse {
358362 }
359363 }
360364
365+ #if SIMD_FMA
361366 namespace fma {
362367
363368 void mul_const_add (float * out, const float * in, const float k, size_t size)
@@ -507,11 +512,12 @@ namespace sse {
507512 }
508513
509514 } // namespace fma
515+ #endif // SIMD_FMA
510516
511517} // namespace sse
512518
513519namespace avx {
514-
520+ # if SIMD_AVX
515521 void add (float * out, const float * in, size_t size)
516522 {
517523 assert ((size & 0x7 ) == 0 );
@@ -758,7 +764,7 @@ namespace avx {
758764 }
759765
760766 namespace fma {
761-
767+ # if SIMD_FMA
762768 void mul_const_add (float * out, const float * in, const float k, size_t size)
763769 {
764770 assert ((size & 0x7 ) == 0 );
@@ -947,9 +953,9 @@ namespace avx {
947953
948954 _mm256_zeroupper ();
949955 }
950-
956+ # endif // SIMD_FMA
951957 } // namespace fma
952-
958+ # endif // SIMD_AVX
953959} // namespace avx
954960
955961#endif // SIMD
@@ -983,6 +989,7 @@ static const bool simd_map = []() -> bool {
983989 simd::complex_mul_conj = &sse::complex_mul_conj;
984990 simd::fft_step = &sse::fft_step;
985991
992+ #if SIMD_FMA
986993 if (cpu.fma ) {
987994 simd::mul_const_add = &sse::fma::mul_const_add;
988995 simd::mul_reduce = &sse::fma::mul_reduce;
@@ -991,8 +998,10 @@ static const bool simd_map = []() -> bool {
991998 simd::complex_mul_conj = &sse::fma::complex_mul_conj;
992999 simd::fft_step = &sse::fma::fft_step;
9931000 }
1001+ #endif // SIMD_FMA
9941002 }
9951003
1004+ #if SIMD_AVX
9961005 if (cpu.avx ) {
9971006 simd::add = &avx::add;
9981007 simd::mul_const_add = &avx::mul_const_add;
@@ -1004,6 +1013,7 @@ static const bool simd_map = []() -> bool {
10041013 simd::complex_mul_conj = &avx::complex_mul_conj;
10051014 simd::fft_step = &avx::fft_step;
10061015
1016+ #if SIMD_FMA
10071017 if (cpu.fma ) {
10081018 simd::mul_const_add = &avx::fma::mul_const_add;
10091019 simd::mul_reduce = &avx::fma::mul_reduce;
@@ -1012,7 +1022,9 @@ static const bool simd_map = []() -> bool {
10121022 simd::complex_mul_conj = &avx::fma::complex_mul_conj;
10131023 simd::fft_step = &avx::fma::fft_step;
10141024 }
1025+ #endif // SIMD_FMA
10151026 }
1027+ #endif // SIMD_AVX
10161028
10171029 return true ;
10181030}();
0 commit comments