Skip to content

Commit f4a58d3

Browse files
WIP
1 parent 79762fa commit f4a58d3

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -790,37 +790,42 @@ namespace xsimd
790790
{
791791
return select(batch_bool<T, A> { Values... }, true_br, false_br, altivec {});
792792
}
793-
#if 0
794793

795794
// shuffle
796795
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
797796
XSIMD_INLINE batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3> mask, requires_arch<altivec>) noexcept
798797
{
799-
constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3);
800-
// shuffle within lane
801-
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
802-
return _mm_shuffle_ps(x, y, smask);
803-
804-
// shuffle within opposite lane
805-
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
806-
return _mm_shuffle_ps(y, x, smask);
807-
return shuffle(x, y, mask, common {});
798+
return vec_perm(x, y,
799+
(__vector unsigned char) {
800+
4 * I0 + 0, 4 * I0 + 1, 4 * I0 + 2, 4 * I0 + 3,
801+
4 * I1 + 0, 4 * I1 + 1, 4 * I1 + 2, 4 * I1 + 3,
802+
4 * I2 + 0, 4 * I2 + 1, 4 * I2 + 2, 4 * I2 + 3,
803+
4 * I3 + 0, 4 * I3 + 1, 4 * I3 + 2, 4 * I3 + 3 });
808804
}
809805

810806
template <class A, class ITy, ITy I0, ITy I1>
811807
XSIMD_INLINE batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<ITy, A, I0, I1> mask, requires_arch<altivec>) noexcept
812808
{
813-
constexpr uint32_t smask = detail::mod_shuffle(I0, I1);
814-
// shuffle within lane
815-
if (I0 < 2 && I1 >= 2)
816-
return _mm_shuffle_pd(x, y, smask);
817-
818-
// shuffle within opposite lane
819-
if (I0 >= 2 && I1 < 2)
820-
return _mm_shuffle_pd(y, x, smask);
821-
return shuffle(x, y, mask, common {});
809+
return vec_perm(x, y,
810+
(__vector unsigned char) {
811+
8 * I0 + 0,
812+
8 * I0 + 1,
813+
8 * I0 + 2,
814+
8 * I0 + 3,
815+
8 * I0 + 4,
816+
8 * I0 + 5,
817+
8 * I0 + 6,
818+
8 * I0 + 7,
819+
8 * I1 + 0,
820+
8 * I1 + 1,
821+
8 * I1 + 2,
822+
8 * I1 + 3,
823+
8 * I1 + 4,
824+
8 * I1 + 5,
825+
8 * I1 + 6,
826+
8 * I1 + 7,
827+
});
822828
}
823-
#endif
824829

825830
// sqrt
826831
template <class A>

0 commit comments

Comments
 (0)