Skip to content

Commit bb5dd63

Browse files
Use _mm512_reduce_add_ps and _mm512_reduce_add_pd instead of custom sequences
1 parent ee19a39 commit bb5dd63

File tree

1 file changed

+2
-12
lines changed

1 file changed

+2
-12
lines changed

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,22 +1506,12 @@ namespace xsimd
15061506
template <class A>
15071507
XSIMD_INLINE float reduce_add(batch<float, A> const& rhs, requires_arch<avx512f>) noexcept
15081508
{
1509-
__m128 tmp1 = _mm512_extractf32x4_ps(rhs, 0);
1510-
__m128 tmp2 = _mm512_extractf32x4_ps(rhs, 1);
1511-
__m128 tmp3 = _mm512_extractf32x4_ps(rhs, 2);
1512-
__m128 tmp4 = _mm512_extractf32x4_ps(rhs, 3);
1513-
__m128 res1 = _mm_add_ps(tmp1, tmp2);
1514-
__m128 res2 = _mm_add_ps(tmp3, tmp4);
1515-
__m128 res3 = _mm_add_ps(res1, res2);
1516-
return reduce_add(batch<float, sse4_2>(res3), sse4_2 {});
1509+
return _mm512_reduce_add_ps(rhs);
15171510
}
15181511
template <class A>
15191512
XSIMD_INLINE double reduce_add(batch<double, A> const& rhs, requires_arch<avx512f>) noexcept
15201513
{
1521-
__m256d tmp1 = _mm512_extractf64x4_pd(rhs, 1);
1522-
__m256d tmp2 = _mm512_extractf64x4_pd(rhs, 0);
1523-
__m256d res1 = _mm256_add_pd(tmp1, tmp2);
1524-
return reduce_add(batch<double, avx2>(res1), avx2 {});
1514+
return _mm512_reduce_add_pd(rhs);
15251515
}
15261516
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
15271517
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<avx512f>) noexcept

0 commit comments

Comments
 (0)