@@ -1506,22 +1506,12 @@ namespace xsimd
15061506 template <class A >
15071507 XSIMD_INLINE float reduce_add (batch<float , A> const & rhs, requires_arch<avx512f>) noexcept
15081508 {
1509- __m128 tmp1 = _mm512_extractf32x4_ps (rhs, 0 );
1510- __m128 tmp2 = _mm512_extractf32x4_ps (rhs, 1 );
1511- __m128 tmp3 = _mm512_extractf32x4_ps (rhs, 2 );
1512- __m128 tmp4 = _mm512_extractf32x4_ps (rhs, 3 );
1513- __m128 res1 = _mm_add_ps (tmp1, tmp2);
1514- __m128 res2 = _mm_add_ps (tmp3, tmp4);
1515- __m128 res3 = _mm_add_ps (res1, res2);
1516- return reduce_add (batch<float , sse4_2>(res3), sse4_2 {});
1509+ return _mm512_reduce_add_ps (rhs);
15171510 }
15181511 template <class A >
15191512 XSIMD_INLINE double reduce_add (batch<double , A> const & rhs, requires_arch<avx512f>) noexcept
15201513 {
1521- __m256d tmp1 = _mm512_extractf64x4_pd (rhs, 1 );
1522- __m256d tmp2 = _mm512_extractf64x4_pd (rhs, 0 );
1523- __m256d res1 = _mm256_add_pd (tmp1, tmp2);
1524- return reduce_add (batch<double , avx2>(res1), avx2 {});
1514+ return _mm512_reduce_add_pd (rhs);
15251515 }
15261516 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
15271517 XSIMD_INLINE T reduce_add (batch<T, A> const & self, requires_arch<avx512f>) noexcept
0 commit comments