Skip to content

Commit 16be737

Browse files
WIP
1 parent 79762fa commit 16be737

File tree

1 file changed

+34
-43
lines changed

1 file changed

+34
-43
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -790,121 +790,112 @@ namespace xsimd
790790
{
791791
return select(batch_bool<T, A> { Values... }, true_br, false_br, altivec {});
792792
}
793-
#if 0
794793

795794
// shuffle
796795
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
797796
XSIMD_INLINE batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3> mask, requires_arch<altivec>) noexcept
798797
{
799-
constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3);
800-
// shuffle within lane
801-
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
802-
return _mm_shuffle_ps(x, y, smask);
803-
804-
// shuffle within opposite lane
805-
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
806-
return _mm_shuffle_ps(y, x, smask);
807-
return shuffle(x, y, mask, common {});
798+
return vec_perm(x, y,
799+
(__vector unsigned char) {
800+
4 * I0 + 0, 4 * I0 + 1, 4 * I0 + 2, 4 * I0 + 3,
801+
4 * I1 + 0, 4 * I1 + 1, 4 * I1 + 2, 4 * I1 + 3,
802+
4 * I2 + 0, 4 * I2 + 1, 4 * I2 + 2, 4 * I2 + 3,
803+
4 * I3 + 0, 4 * I3 + 1, 4 * I3 + 2, 4 * I3 + 3 });
808804
}
809805

810806
template <class A, class ITy, ITy I0, ITy I1>
811807
XSIMD_INLINE batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<ITy, A, I0, I1> mask, requires_arch<altivec>) noexcept
812808
{
813-
constexpr uint32_t smask = detail::mod_shuffle(I0, I1);
814-
// shuffle within lane
815-
if (I0 < 2 && I1 >= 2)
816-
return _mm_shuffle_pd(x, y, smask);
817-
818-
// shuffle within opposite lane
819-
if (I0 >= 2 && I1 < 2)
820-
return _mm_shuffle_pd(y, x, smask);
821-
return shuffle(x, y, mask, common {});
809+
return vec_perm(x, y,
810+
(__vector unsigned char) {
811+
8 * I0 + 0, 8 * I0 + 1, 8 * I0 + 2, 8 * I0 + 3, 8 * I0 + 4, 8 * I0 + 5, 8 * I0 + 6, 8 * I0 + 7,
812+
8 * I1 + 0, 8 * I1 + 1, 8 * I1 + 2, 8 * I1 + 3, 8 * I1 + 4, 8 * I1 + 5, 8 * I1 + 6, 8 * I1 + 7,
813+
);
822814
}
823-
#endif
824815

825816
// sqrt
826817
template <class A>
827818
XSIMD_INLINE batch<float, A> sqrt(batch<float, A> const& val, requires_arch<altivec>) noexcept
828819
{
829-
return vec_sqrt(val.data);
820+
return vec_sqrt(val.data);
830821
}
831822

832823
template <class A>
833824
XSIMD_INLINE batch<double, A> sqrt(batch<double, A> const& val, requires_arch<altivec>) noexcept
834825
{
835-
return vec_sqrt(val.data);
826+
return vec_sqrt(val.data);
836827
}
837828

838829
// slide_left
839830
template <size_t N, class A, class T>
840831
XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<altivec>) noexcept
841832
{
842-
return (typename batch<T, A>::register_type)vec_sll((__vector unsigned char)x.data, vec_splats((uint32_t)N));
833+
return (typename batch<T, A>::register_type)vec_sll((__vector unsigned char)x.data, vec_splats((uint32_t)N));
843834
}
844835

845836
// slide_right
846837
template <size_t N, class A, class T>
847838
XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<altivec>) noexcept
848839
{
849-
return (typename batch<T, A>::register_type)vec_srl((__vector unsigned char)x.data, vec_splats((uint32_t)N));
840+
return (typename batch<T, A>::register_type)vec_srl((__vector unsigned char)x.data, vec_splats((uint32_t)N));
850841
}
851842

852843
// sadd
853844
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value && sizeof(T) != 8, void>::type>
854845
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
855846
{
856-
return vec_adds(self.data, other.data);
847+
return vec_adds(self.data, other.data);
857848
}
858849

859850
// set
860851
template <class A, class T, class... Values>
861852
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<altivec>, Values... values) noexcept
862853
{
863-
static_assert(sizeof...(Values) == batch<T, A>::size, "consistent init");
864-
return typename batch<T, A>::register_type { values... };
854+
static_assert(sizeof...(Values) == batch<T, A>::size, "consistent init");
855+
return typename batch<T, A>::register_type { values... };
865856
}
866857

867858
template <class A, class T, class... Values, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
868859
XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<altivec>, Values... values) noexcept
869860
{
870-
static_assert(sizeof...(Values) == batch_bool<T, A>::size, "consistent init");
871-
return typename batch_bool<T, A>::register_type { static_cast<decltype(std::declval<typename batch_bool<T, A>::register_type>()[0])>(values ? -1LL : 0LL)... };
861+
static_assert(sizeof...(Values) == batch_bool<T, A>::size, "consistent init");
862+
return typename batch_bool<T, A>::register_type { static_cast<decltype(std::declval<typename batch_bool<T, A>::register_type>()[0])>(values ? -1LL : 0LL)... };
872863
}
873864

874865
// ssub
875866

876867
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value && sizeof(T) == 1, void>::type>
877868
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
878869
{
879-
return vec_subs(self.data, other.data);
870+
return vec_subs(self.data, other.data);
880871
}
881872

882873
// store_aligned
883874
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
884875
XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
885876
{
886-
return vec_st(self.data, 0, reinterpret_cast<typename batch<T, A>::register_type*>(mem));
877+
return vec_st(self.data, 0, reinterpret_cast<typename batch<T, A>::register_type*>(mem));
887878
}
888879

889880
// store_unaligned
890881
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
891882
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
892883
{
893-
auto tmp = vec_perm(*reinterpret_cast<const __vector unsigned char*>(&self.data), *reinterpret_cast<const __vector unsigned char*>(&self.data), vec_lvsr(0, (unsigned char*)mem));
894-
vec_ste((__vector unsigned char)tmp, 0, (unsigned char*)mem);
895-
vec_ste((__vector unsigned short)tmp, 1, (unsigned short*)mem);
896-
vec_ste((__vector unsigned int)tmp, 3, (unsigned int*)mem);
897-
vec_ste((__vector unsigned int)tmp, 4, (unsigned int*)mem);
898-
vec_ste((__vector unsigned int)tmp, 8, (unsigned int*)mem);
899-
vec_ste((__vector unsigned int)tmp, 12, (unsigned int*)mem);
900-
vec_ste((__vector unsigned short)tmp, 14, (unsigned short*)mem);
884+
auto tmp = vec_perm(*reinterpret_cast<const __vector unsigned char*>(&self.data), *reinterpret_cast<const __vector unsigned char*>(&self.data), vec_lvsr(0, (unsigned char*)mem));
885+
vec_ste((__vector unsigned char)tmp, 0, (unsigned char*)mem);
886+
vec_ste((__vector unsigned short)tmp, 1, (unsigned short*)mem);
887+
vec_ste((__vector unsigned int)tmp, 3, (unsigned int*)mem);
888+
vec_ste((__vector unsigned int)tmp, 4, (unsigned int*)mem);
889+
vec_ste((__vector unsigned int)tmp, 8, (unsigned int*)mem);
890+
vec_ste((__vector unsigned int)tmp, 12, (unsigned int*)mem);
891+
vec_ste((__vector unsigned short)tmp, 14, (unsigned short*)mem);
901892
}
902893

903894
// sub
904895
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
905896
XSIMD_INLINE batch<T, A> sub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
906897
{
907-
return vec_sub(self.data, other.data);
898+
return vec_sub(self.data, other.data);
908899
}
909900

910901
#if 0
@@ -1024,16 +1015,16 @@ namespace xsimd
10241015
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
10251016
XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
10261017
{
1027-
return vec_mergeh(self.data, other.data);
1018+
return vec_mergeh(self.data, other.data);
10281019
}
10291020

10301021
// zip_lo
10311022
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
10321023
XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
10331024
{
1034-
return vec_mergel(self.data, other.data);
1025+
return vec_mergel(self.data, other.data);
1026+
}
10351027
}
10361028
}
1037-
}
10381029

10391030
#endif

0 commit comments

Comments
 (0)