@@ -790,121 +790,112 @@ namespace xsimd
790790 {
791791 return select (batch_bool<T, A> { Values... }, true_br, false_br, altivec {});
792792 }
793- #if 0
794793
795794 // shuffle
796795 template <class A , class ITy , ITy I0, ITy I1, ITy I2, ITy I3>
797796 XSIMD_INLINE batch<float , A> shuffle (batch<float , A> const & x, batch<float , A> const & y, batch_constant<ITy, A, I0, I1, I2, I3> mask, requires_arch<altivec>) noexcept
798797 {
799- constexpr uint32_t smask = detail::mod_shuffle(I0, I1, I2, I3);
800- // shuffle within lane
801- if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
802- return _mm_shuffle_ps(x, y, smask);
803-
804- // shuffle within opposite lane
805- if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
806- return _mm_shuffle_ps(y, x, smask);
807- return shuffle(x, y, mask, common {});
798+ return vec_perm (x, y,
799+ (__vector unsigned char ) {
800+ 4 * I0 + 0 , 4 * I0 + 1 , 4 * I0 + 2 , 4 * I0 + 3 ,
801+ 4 * I1 + 0 , 4 * I1 + 1 , 4 * I1 + 2 , 4 * I1 + 3 ,
802+ 4 * I2 + 0 , 4 * I2 + 1 , 4 * I2 + 2 , 4 * I2 + 3 ,
803+ 4 * I3 + 0 , 4 * I3 + 1 , 4 * I3 + 2 , 4 * I3 + 3 });
808804 }
809805
810806 template <class A , class ITy , ITy I0, ITy I1>
811807 XSIMD_INLINE batch<double , A> shuffle (batch<double , A> const & x, batch<double , A> const & y, batch_constant<ITy, A, I0, I1> mask, requires_arch<altivec>) noexcept
812808 {
813- constexpr uint32_t smask = detail::mod_shuffle(I0, I1);
814- // shuffle within lane
815- if (I0 < 2 && I1 >= 2)
816- return _mm_shuffle_pd(x, y, smask);
817-
818- // shuffle within opposite lane
819- if (I0 >= 2 && I1 < 2)
820- return _mm_shuffle_pd(y, x, smask);
821- return shuffle(x, y, mask, common {});
809+ return vec_perm (x, y,
810+ (__vector unsigned char ) {
811+ 8 * I0 + 0 , 8 * I0 + 1 , 8 * I0 + 2 , 8 * I0 + 3 , 8 * I0 + 4 , 8 * I0 + 5 , 8 * I0 + 6 , 8 * I0 + 7 ,
812+ 8 * I1 + 0 , 8 * I1 + 1 , 8 * I1 + 2 , 8 * I1 + 3 , 8 * I1 + 4 , 8 * I1 + 5 , 8 * I1 + 6 , 8 * I1 + 7 ,
813+ );
822814 }
823- #endif
824815
825816 // sqrt
826817 template <class A >
827818 XSIMD_INLINE batch<float , A> sqrt (batch<float , A> const & val, requires_arch<altivec>) noexcept
828819 {
829- return vec_sqrt (val.data );
820+ return vec_sqrt (val.data );
830821 }
831822
832823 template <class A >
833824 XSIMD_INLINE batch<double , A> sqrt (batch<double , A> const & val, requires_arch<altivec>) noexcept
834825 {
835- return vec_sqrt (val.data );
826+ return vec_sqrt (val.data );
836827 }
837828
838829 // slide_left
839830 template <size_t N, class A , class T >
840831 XSIMD_INLINE batch<T, A> slide_left (batch<T, A> const & x, requires_arch<altivec>) noexcept
841832 {
842- return (typename batch<T, A>::register_type)vec_sll ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
833+ return (typename batch<T, A>::register_type)vec_sll ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
843834 }
844835
845836 // slide_right
846837 template <size_t N, class A , class T >
847838 XSIMD_INLINE batch<T, A> slide_right (batch<T, A> const & x, requires_arch<altivec>) noexcept
848839 {
849- return (typename batch<T, A>::register_type)vec_srl ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
840+ return (typename batch<T, A>::register_type)vec_srl ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
850841 }
851842
852843 // sadd
853844 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value && sizeof (T) != 8 , void >::type>
854845 XSIMD_INLINE batch<T, A> sadd (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
855846 {
856- return vec_adds (self.data , other.data );
847+ return vec_adds (self.data , other.data );
857848 }
858849
859850 // set
860851 template <class A , class T , class ... Values>
861852 XSIMD_INLINE batch<T, A> set (batch<T, A> const &, requires_arch<altivec>, Values... values) noexcept
862853 {
863- static_assert (sizeof ...(Values) == batch<T, A>::size, " consistent init" );
864- return typename batch<T, A>::register_type { values... };
854+ static_assert (sizeof ...(Values) == batch<T, A>::size, " consistent init" );
855+ return typename batch<T, A>::register_type { values... };
865856 }
866857
867858 template <class A , class T , class ... Values, class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
868859 XSIMD_INLINE batch_bool<T, A> set (batch_bool<T, A> const &, requires_arch<altivec>, Values... values) noexcept
869860 {
870- static_assert (sizeof ...(Values) == batch_bool<T, A>::size, " consistent init" );
871- return typename batch_bool<T, A>::register_type { static_cast <decltype (std::declval<typename batch_bool<T, A>::register_type>()[0 ])>(values ? -1LL : 0LL )... };
861+ static_assert (sizeof ...(Values) == batch_bool<T, A>::size, " consistent init" );
862+ return typename batch_bool<T, A>::register_type { static_cast <decltype (std::declval<typename batch_bool<T, A>::register_type>()[0 ])>(values ? -1LL : 0LL )... };
872863 }
873864
874865 // ssub
875866
876867 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value && sizeof (T) == 1 , void >::type>
877868 XSIMD_INLINE batch<T, A> ssub (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
878869 {
879- return vec_subs (self.data , other.data );
870+ return vec_subs (self.data , other.data );
880871 }
881872
882873 // store_aligned
883874 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
884875 XSIMD_INLINE void store_aligned (T* mem, batch<T, A> const & self, requires_arch<altivec>) noexcept
885876 {
886- return vec_st (self.data , 0 , reinterpret_cast <typename batch<T, A>::register_type*>(mem));
877+ return vec_st (self.data , 0 , reinterpret_cast <typename batch<T, A>::register_type*>(mem));
887878 }
888879
889880 // store_unaligned
890881 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
891882 XSIMD_INLINE void store_unaligned (T* mem, batch<T, A> const & self, requires_arch<altivec>) noexcept
892883 {
893- auto tmp = vec_perm (*reinterpret_cast <const __vector unsigned char *>(&self.data ), *reinterpret_cast <const __vector unsigned char *>(&self.data ), vec_lvsr (0 , (unsigned char *)mem));
894- vec_ste ((__vector unsigned char )tmp, 0 , (unsigned char *)mem);
895- vec_ste ((__vector unsigned short )tmp, 1 , (unsigned short *)mem);
896- vec_ste ((__vector unsigned int )tmp, 3 , (unsigned int *)mem);
897- vec_ste ((__vector unsigned int )tmp, 4 , (unsigned int *)mem);
898- vec_ste ((__vector unsigned int )tmp, 8 , (unsigned int *)mem);
899- vec_ste ((__vector unsigned int )tmp, 12 , (unsigned int *)mem);
900- vec_ste ((__vector unsigned short )tmp, 14 , (unsigned short *)mem);
884+ auto tmp = vec_perm (*reinterpret_cast <const __vector unsigned char *>(&self.data ), *reinterpret_cast <const __vector unsigned char *>(&self.data ), vec_lvsr (0 , (unsigned char *)mem));
885+ vec_ste ((__vector unsigned char )tmp, 0 , (unsigned char *)mem);
886+ vec_ste ((__vector unsigned short )tmp, 1 , (unsigned short *)mem);
887+ vec_ste ((__vector unsigned int )tmp, 3 , (unsigned int *)mem);
888+ vec_ste ((__vector unsigned int )tmp, 4 , (unsigned int *)mem);
889+ vec_ste ((__vector unsigned int )tmp, 8 , (unsigned int *)mem);
890+ vec_ste ((__vector unsigned int )tmp, 12 , (unsigned int *)mem);
891+ vec_ste ((__vector unsigned short )tmp, 14 , (unsigned short *)mem);
901892 }
902893
903894 // sub
904895 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
905896 XSIMD_INLINE batch<T, A> sub (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
906897 {
907- return vec_sub (self.data , other.data );
898+ return vec_sub (self.data , other.data );
908899 }
909900
910901#if 0
@@ -1024,16 +1015,16 @@ namespace xsimd
10241015 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
10251016 XSIMD_INLINE batch<T, A> zip_hi (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
10261017 {
1027- return vec_mergeh (self.data , other.data );
1018+ return vec_mergeh (self.data , other.data );
10281019 }
10291020
10301021 // zip_lo
10311022 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
10321023 XSIMD_INLINE batch<T, A> zip_lo (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
10331024 {
1034- return vec_mergel (self.data , other.data );
1025+ return vec_mergel (self.data , other.data );
1026+ }
10351027 }
10361028 }
1037- }
10381029
10391030#endif
0 commit comments