@@ -122,9 +122,16 @@ namespace xsimd
122122 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
123123 XSIMD_INLINE batch<T, A> avg (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
124124 {
125- constexpr auto nbit = 8 * sizeof (T) - 1 ;
126- auto adj = ((self ^ other) << nbit) >> nbit;
127- return avgr (self, other, A {}) - adj;
125+ XSIMD_IF_CONSTEXPR (sizeof (T) < 8 )
126+ {
127+ constexpr auto nbit = 8 * sizeof (T) - 1 ;
128+ auto adj = bitwise_cast<T>(bitwise_cast<as_unsigned_integer_t <T>>((self ^ other) << nbit) >> nbit);
129+ return avgr (self, other, A {}) - adj;
130+ }
131+ else
132+ {
133+ return avg (self, other, common {});
134+ }
128135 }
129136 template <class A >
130137 XSIMD_INLINE batch<float , A> avg (batch<float , A> const & self, batch<float , A> const & other, requires_arch<altivec>) noexcept
@@ -207,7 +214,14 @@ namespace xsimd
207214 {
208215 using shift_type = as_unsigned_integer_t <T>;
209216 batch<shift_type, A> shift (static_cast <shift_type>(other));
210- return vec_sr (self.data , shift.data );
217+ XSIMD_IF_CONSTEXPR (std::is_signed<T>::value)
218+ {
219+ return vec_sra (self.data , shift.data );
220+ }
221+ else
222+ {
223+ return vec_sr (self.data , shift.data );
224+ }
211225 }
212226
213227 // bitwise_xor
@@ -226,7 +240,7 @@ namespace xsimd
226240 template <class A , class T_in , class T_out >
227241 XSIMD_INLINE batch<T_out, A> bitwise_cast (batch<T_in, A> const & self, batch<T_out, A> const &, requires_arch<altivec>) noexcept
228242 {
229- return * reinterpret_cast < typename batch<T_out, A>::register_type const *>(& self.data );
243+ return ( typename batch<T_out, A>::register_type)( self.data );
230244 }
231245
232246 // broadcast
@@ -243,43 +257,43 @@ namespace xsimd
243257 template <class A >
244258 XSIMD_INLINE batch<float , A> complex_low (batch<std::complex <float >, A> const & self, requires_arch<altivec>) noexcept
245259 {
246- return vec_mergel (self.real ().data , self.imag ().data );
260+ return vec_mergeh (self.real ().data , self.imag ().data );
247261 }
248262 template <class A >
249263 XSIMD_INLINE batch<double , A> complex_low (batch<std::complex <double >, A> const & self, requires_arch<altivec>) noexcept
250264 {
251- return vec_mergel (self.real ().data , self.imag ().data );
265+ return vec_mergeh (self.real ().data , self.imag ().data );
252266 }
253267 // complex_high
254268 template <class A >
255269 XSIMD_INLINE batch<float , A> complex_high (batch<std::complex <float >, A> const & self, requires_arch<altivec>) noexcept
256270 {
257- return vec_mergeh (self.real ().data , self.imag ().data );
271+ return vec_mergel (self.real ().data , self.imag ().data );
258272 }
259273 template <class A >
260274 XSIMD_INLINE batch<double , A> complex_high (batch<std::complex <double >, A> const & self, requires_arch<altivec>) noexcept
261275 {
262- return vec_mergeh (self.real ().data , self.imag ().data );
276+ return vec_mergel (self.real ().data , self.imag ().data );
263277 }
264278 }
265279
266280 // decr_if
267281 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
268282 XSIMD_INLINE batch<T, A> decr_if (batch<T, A> const & self, batch_bool<T, A> const & mask, requires_arch<altivec>) noexcept
269283 {
270- return self + batch<T, A>(mask.data );
284+ return self + batch<T, A>(( typename batch<T, A>::register_type) mask.data );
271285 }
272286
273287 // div
274288 template <class A >
275289 XSIMD_INLINE batch<float , A> div (batch<float , A> const & self, batch<float , A> const & other, requires_arch<altivec>) noexcept
276290 {
277- return vec_mul (self.data , vec_re ( other.data ) );
291+ return vec_div (self.data , other.data );
278292 }
279293 template <class A >
280294 XSIMD_INLINE batch<double , A> div (batch<double , A> const & self, batch<double , A> const & other, requires_arch<altivec>) noexcept
281295 {
282- return vec_mul (self.data , vec_re ( other.data ) );
296+ return vec_div (self.data , other.data );
283297 }
284298
285299 // fast_cast
@@ -471,7 +485,7 @@ namespace xsimd
471485 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
472486 XSIMD_INLINE batch<T, A> incr_if (batch<T, A> const & self, batch_bool<T, A> const & mask, requires_arch<altivec>) noexcept
473487 {
474- return self - batch<T, A>(mask.data );
488+ return self - batch<T, A>(( typename batch<T, A>::register_type) mask.data );
475489 }
476490
477491 // insert
@@ -504,9 +518,7 @@ namespace xsimd
504518 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
505519 XSIMD_INLINE batch<T, A> load_unaligned (T const * mem, convert<T>, requires_arch<altivec>) noexcept
506520 {
507- auto lo = vec_ld (0 , reinterpret_cast <const typename batch<T, A>::register_type*>(mem));
508- auto hi = vec_ld (16 , reinterpret_cast <const typename batch<T, A>::register_type*>(mem));
509- return vec_perm (lo, hi, vec_lvsl (0 , mem));
521+ return *(typename batch<T, A>::register_type const *)mem;
510522 }
511523
512524 // load_complex
@@ -515,7 +527,9 @@ namespace xsimd
515527 template <class A >
516528 XSIMD_INLINE batch<std::complex <float >, A> load_complex (batch<float , A> const & hi, batch<float , A> const & lo, requires_arch<altivec>) noexcept
517529 {
518- return { vec_mergee (hi.data , lo.data ), vec_mergeo (hi.data , lo.data ) };
530+ __vector unsigned char perme = { 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 };
531+ __vector unsigned char permo = { 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 };
532+ return { vec_perm (hi.data , lo.data , perme), vec_perm (hi.data , lo.data , permo) };
519533 }
520534 template <class A >
521535 XSIMD_INLINE batch<std::complex <double >, A> load_complex (batch<double , A> const & hi, batch<double , A> const & lo, requires_arch<altivec>) noexcept
@@ -685,7 +699,7 @@ namespace xsimd
685699 {
686700 auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
687701 auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
688- auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
702+ auto tmp2 = vec_mergel (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
689703 auto tmp3 = vec_add (tmp1, tmp2);
690704 return vec_extract (tmp3, 0 );
691705 }
@@ -694,7 +708,7 @@ namespace xsimd
694708 {
695709 auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
696710 auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
697- auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
711+ auto tmp2 = vec_mergel (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
698712 auto tmp3 = vec_add (tmp1, tmp2);
699713 return vec_extract (tmp3, 0 );
700714 }
@@ -704,7 +718,7 @@ namespace xsimd
704718 // FIXME: find an in-order approach
705719 auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
706720 auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
707- auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
721+ auto tmp2 = vec_mergel (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
708722 auto tmp3 = vec_add (tmp1, tmp2);
709723 return vec_extract (tmp3, 0 );
710724 }
@@ -783,7 +797,7 @@ namespace xsimd
783797 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
784798 XSIMD_INLINE batch<T, A> select (batch_bool<T, A> const & cond, batch<T, A> const & true_br, batch<T, A> const & false_br, requires_arch<altivec>) noexcept
785799 {
786- return vec_sel (true_br .data , false_br .data , cond.data );
800+ return vec_sel (false_br .data , true_br .data , cond.data );
787801 }
788802 template <class A , class T , bool ... Values, class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
789803 XSIMD_INLINE batch<T, A> select (batch_bool_constant<T, A, Values...> const &, batch<T, A> const & true_br, batch<T, A> const & false_br, requires_arch<altivec>) noexcept
@@ -844,14 +858,29 @@ namespace xsimd
844858 template <size_t N, class A , class T >
845859 XSIMD_INLINE batch<T, A> slide_left (batch<T, A> const & x, requires_arch<altivec>) noexcept
846860 {
847- return (typename batch<T, A>::register_type)vec_sll ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
861+ XSIMD_IF_CONSTEXPR (N == batch<T, A>::size * sizeof (T))
862+ {
863+ return batch<T, A>(0 );
864+ }
865+ else
866+ {
867+ auto slider = vec_splats ((uint8_t )(8 * N));
868+ return (typename batch<T, A>::register_type)vec_slo (x.data , slider);
869+ }
848870 }
849871
850872 // slide_right
851873 template <size_t N, class A , class T >
852874 XSIMD_INLINE batch<T, A> slide_right (batch<T, A> const & x, requires_arch<altivec>) noexcept
853875 {
854- return (typename batch<T, A>::register_type)vec_srl ((__vector unsigned char )x.data , vec_splats ((uint32_t )N));
876+ XSIMD_IF_CONSTEXPR (N == batch<T, A>::size * sizeof (T))
877+ {
878+ return batch<T, A>(0 );
879+ }
880+ else
881+ {
882+ return (typename batch<T, A>::register_type)vec_sro ((__vector unsigned char )x.data , vec_splats ((uint8_t )(8 * N)));
883+ }
855884 }
856885
857886 // sadd
@@ -895,14 +924,7 @@ namespace xsimd
895924 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
896925 XSIMD_INLINE void store_unaligned (T* mem, batch<T, A> const & self, requires_arch<altivec>) noexcept
897926 {
898- auto tmp = vec_perm (*reinterpret_cast <const __vector unsigned char *>(&self.data ), *reinterpret_cast <const __vector unsigned char *>(&self.data ), vec_lvsr (0 , (unsigned char *)mem));
899- vec_ste ((__vector unsigned char )tmp, 0 , (unsigned char *)mem);
900- vec_ste ((__vector unsigned short )tmp, 1 , (unsigned short *)mem);
901- vec_ste ((__vector unsigned int )tmp, 3 , (unsigned int *)mem);
902- vec_ste ((__vector unsigned int )tmp, 4 , (unsigned int *)mem);
903- vec_ste ((__vector unsigned int )tmp, 8 , (unsigned int *)mem);
904- vec_ste ((__vector unsigned int )tmp, 12 , (unsigned int *)mem);
905- vec_ste ((__vector unsigned short )tmp, 14 , (unsigned short *)mem);
927+ *(typename batch<T, A>::register_type*)mem = self.data ;
906928 }
907929
908930 // sub
@@ -1064,14 +1086,14 @@ namespace xsimd
10641086 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
10651087 XSIMD_INLINE batch<T, A> zip_hi (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
10661088 {
1067- return vec_mergeh (self.data , other.data );
1089+ return vec_mergel (self.data , other.data );
10681090 }
10691091
10701092 // zip_lo
10711093 template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
10721094 XSIMD_INLINE batch<T, A> zip_lo (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
10731095 {
1074- return vec_mergel (self.data , other.data );
1096+ return vec_mergeh (self.data , other.data );
10751097 }
10761098 }
10771099}
0 commit comments