@@ -647,53 +647,26 @@ namespace xsimd
647647 return vec_re (self);
648648 }
649649
650- #if 0
651650 // reduce_add
652- template <class A>
653- XSIMD_INLINE float reduce_add(batch<float, A> const& self, requires_arch<altivec>) noexcept
654- {
655- __m128 tmp0 = _mm_add_ps(self, _mm_movehl_ps(self, self));
656- __m128 tmp1 = _mm_add_ss(tmp0, _mm_shuffle_ps(tmp0, tmp0, 1));
657- return _mm_cvtss_f32(tmp1);
658- }
659-
660- template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
651+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
661652 XSIMD_INLINE T reduce_add (batch<T, A> const & self, requires_arch<altivec>) noexcept
662653 {
663654 XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
664655 {
665- __m128i tmp1 = _mm_shuffle_epi32(self, 0x0E);
666- __m128i tmp2 = _mm_add_epi32(self, tmp1);
667- __m128i tmp3 = _mm_shuffle_epi32(tmp2, 0x01);
668- __m128i tmp4 = _mm_add_epi32(tmp2, tmp3);
669- return _mm_cvtsi128_si32(tmp4);
670- }
671- else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
672- {
673- __m128i tmp1 = _mm_shuffle_epi32(self, 0x0E);
674- __m128i tmp2 = _mm_add_epi64(self, tmp1);
675- #if defined(__x86_64__)
676- return _mm_cvtsi128_si64(tmp2);
677- #else
678- __m128i m;
679- _mm_storel_epi64(&m, tmp2);
680- int64_t i;
681- std::memcpy(&i, &m, sizeof(i));
682- return i;
683- #endif
656+ // FIXME: fine an in-order approach
657+ auto tmp0 = vec_reve (self); // v3, v2, v1, v0
658+ auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
659+ auto tmp2 = vec_permi (tmp1, tmp1, 0x3 ); // v2 + v1, v3 + v0, v2 + v1, v3 + v0
660+ auto tmp3 = vec_add (tmp1, tmp2);
661+ return vec_extract (tmp3, 0 );
684662 }
685663 else
686664 {
687665 return hadd (self, common {});
688666 }
689667 }
690668
691- template <class A>
692- XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<altivec>) noexcept
693- {
694- return _mm_cvtsd_f64(_mm_add_sd(self, _mm_unpackhi_pd(self, self)));
695- }
696-
669+ #if 0
697670 // reduce_max
698671 template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
699672 XSIMD_INLINE T reduce_max(batch<T, A> const& self, requires_arch<altivec>) noexcept
0 commit comments