Skip to content

Commit b5ab4f1

Browse files
WIP
1 parent 68ca9a9 commit b5ab4f1

File tree

1 file changed

+8
-35
lines changed

1 file changed

+8
-35
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -647,53 +647,26 @@ namespace xsimd
647647
return vec_re(self);
648648
}
649649

650-
#if 0
651650
// reduce_add
652-
template <class A>
653-
XSIMD_INLINE float reduce_add(batch<float, A> const& self, requires_arch<altivec>) noexcept
654-
{
655-
__m128 tmp0 = _mm_add_ps(self, _mm_movehl_ps(self, self));
656-
__m128 tmp1 = _mm_add_ss(tmp0, _mm_shuffle_ps(tmp0, tmp0, 1));
657-
return _mm_cvtss_f32(tmp1);
658-
}
659-
660-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
651+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
661652
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<altivec>) noexcept
662653
{
663654
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
664655
{
665-
__m128i tmp1 = _mm_shuffle_epi32(self, 0x0E);
666-
__m128i tmp2 = _mm_add_epi32(self, tmp1);
667-
__m128i tmp3 = _mm_shuffle_epi32(tmp2, 0x01);
668-
__m128i tmp4 = _mm_add_epi32(tmp2, tmp3);
669-
return _mm_cvtsi128_si32(tmp4);
670-
}
671-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
672-
{
673-
__m128i tmp1 = _mm_shuffle_epi32(self, 0x0E);
674-
__m128i tmp2 = _mm_add_epi64(self, tmp1);
675-
#if defined(__x86_64__)
676-
return _mm_cvtsi128_si64(tmp2);
677-
#else
678-
__m128i m;
679-
_mm_storel_epi64(&m, tmp2);
680-
int64_t i;
681-
std::memcpy(&i, &m, sizeof(i));
682-
return i;
683-
#endif
656+
// FIXME: fine an in-order approach
657+
auto tmp0 = vec_reve(self); // v3, v2, v1, v0
658+
auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
659+
auto tmp2 = vec_permi(tmp1, tmp1, 0x3); // v2 + v1, v3 + v0, v2 + v1, v3 + v0
660+
auto tmp3 = vec_add(tmp1, tmp2);
661+
return vec_extract(tmp3, 0);
684662
}
685663
else
686664
{
687665
return hadd(self, common {});
688666
}
689667
}
690668

691-
template <class A>
692-
XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<altivec>) noexcept
693-
{
694-
return _mm_cvtsd_f64(_mm_add_sd(self, _mm_unpackhi_pd(self, self)));
695-
}
696-
669+
#if 0
697670
// reduce_max
698671
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
699672
XSIMD_INLINE T reduce_max(batch<T, A> const& self, requires_arch<altivec>) noexcept

0 commit comments

Comments
 (0)