@@ -1046,25 +1046,7 @@ namespace xsimd
10461046 }
10471047
10481048 // reduce_add
1049- template <class A >
1050- XSIMD_INLINE float reduce_add (batch<float , A> const & rhs, requires_arch<avx>) noexcept
1051- {
1052- // Warning about _mm256_hadd_ps:
1053- // _mm256_hadd_ps(a,b) gives
1054- // (a0+a1,a2+a3,b0+b1,b2+b3,a4+a5,a6+a7,b4+b5,b6+b7). Hence we can't
1055- // rely on a naive use of this method
1056- // rhs = (x0, x1, x2, x3, x4, x5, x6, x7)
1057- // tmp = (x4, x5, x6, x7, x0, x1, x2, x3)
1058- __m256 tmp = _mm256_permute2f128_ps (rhs, rhs, 1 );
1059- // tmp = (x4+x0, x5+x1, x6+x2, x7+x3, x0+x4, x1+x5, x2+x6, x3+x7)
1060- tmp = _mm256_add_ps (rhs, tmp);
1061- // tmp = (x4+x0+x5+x1, x6+x2+x7+x3, -, -, -, -, -, -)
1062- tmp = _mm256_hadd_ps (tmp, tmp);
1063- // tmp = (x4+x0+x5+x1+x6+x2+x7+x3, -, -, -, -, -, -, -)
1064- tmp = _mm256_hadd_ps (tmp, tmp);
1065- return _mm_cvtss_f32 (_mm256_extractf128_ps (tmp, 0 ));
1066- }
1067- template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value || std::is_same<T, double >::value, void >::type>
1049+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value || std::is_same<T, float >::value || std::is_same<T, double >::value, void >::type>
10681050 XSIMD_INLINE T reduce_add (batch<T, A> const & self, requires_arch<avx>) noexcept
10691051 {
10701052 typename batch<T, sse4_2>::register_type low, high;
0 commit comments