WIP

serge-sans-paille · serge-sans-paille · commit b1284c4bbc0c · 2025-07-26T16:02:30.000+02:00
diff --git a/include/xsimd/arch/common/xsimd_common_math.hpp b/include/xsimd/arch/common/xsimd_common_math.hpp
@@ -1087,7 +1087,7 @@ namespace xsimd
         template <class A, class T>
         XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<common>) noexcept
         {
-            return batch<T, A>(self.data) & batch<T, A>(1);
+            return batch<T, A>((typename batch<T, A>::register_type)self.data) & batch<T, A>(1);
         }
 
         // horner
diff --git a/include/xsimd/arch/xsimd_altivec.hpp b/include/xsimd/arch/xsimd_altivec.hpp
@@ -122,9 +122,16 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
         {
-            constexpr auto nbit = 8 * sizeof(T) - 1;
-            auto adj = ((self ^ other) << nbit) >> nbit;
-            return avgr(self, other, A {}) - adj;
+            XSIMD_IF_CONSTEXPR(sizeof(T) < 8)
+            {
+                constexpr auto nbit = 8 * sizeof(T) - 1;
+                auto adj = bitwise_cast<T>(bitwise_cast<as_unsigned_integer_t<T>>((self ^ other) << nbit) >> nbit);
+                return avgr(self, other, A {}) - adj;
+            }
+            else
+            {
+                return avg(self, other, common {});
+            }
         }
         template <class A>
         XSIMD_INLINE batch<float, A> avg(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
@@ -207,7 +214,14 @@ namespace xsimd
         {
             using shift_type = as_unsigned_integer_t<T>;
             batch<shift_type, A> shift(static_cast<shift_type>(other));
-            return vec_sr(self.data, shift.data);
+            XSIMD_IF_CONSTEXPR(std::is_signed<T>::value)
+            {
+                return vec_sra(self.data, shift.data);
+            }
+            else
+            {
+                return vec_sr(self.data, shift.data);
+            }
         }
 
         // bitwise_xor
@@ -226,7 +240,7 @@ namespace xsimd
         template <class A, class T_in, class T_out>
         XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<altivec>) noexcept
         {
-            return *reinterpret_cast<typename batch<T_out, A>::register_type const*>(&self.data);
+            return (typename batch<T_out, A>::register_type)(self.data);
         }
 
         // broadcast
@@ -243,43 +257,43 @@ namespace xsimd
             template <class A>
             XSIMD_INLINE batch<float, A> complex_low(batch<std::complex<float>, A> const& self, requires_arch<altivec>) noexcept
             {
-                return vec_mergel(self.real().data, self.imag().data);
+                return vec_mergeh(self.real().data, self.imag().data);
             }
             template <class A>
             XSIMD_INLINE batch<double, A> complex_low(batch<std::complex<double>, A> const& self, requires_arch<altivec>) noexcept
             {
-                return vec_mergel(self.real().data, self.imag().data);
+                return vec_mergeh(self.real().data, self.imag().data);
             }
             // complex_high
             template <class A>
             XSIMD_INLINE batch<float, A> complex_high(batch<std::complex<float>, A> const& self, requires_arch<altivec>) noexcept
             {
-                return vec_mergeh(self.real().data, self.imag().data);
+                return vec_mergel(self.real().data, self.imag().data);
             }
             template <class A>
             XSIMD_INLINE batch<double, A> complex_high(batch<std::complex<double>, A> const& self, requires_arch<altivec>) noexcept
             {
-                return vec_mergeh(self.real().data, self.imag().data);
+                return vec_mergel(self.real().data, self.imag().data);
             }
         }
 
         // decr_if
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept
         {
-            return self + batch<T, A>(mask.data);
+            return self + batch<T, A>((typename batch<T, A>::register_type)mask.data);
         }
 
         // div
         template <class A>
         XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
         {
-            return vec_mul(self.data, vec_re(other.data));
+            return vec_div(self.data, other.data);
         }
         template <class A>
         XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
         {
-            return vec_mul(self.data, vec_re(other.data));
+            return vec_div(self.data, other.data);
         }
 
         // fast_cast
@@ -471,7 +485,7 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept
         {
-            return self - batch<T, A>(mask.data);
+            return self - batch<T, A>((typename batch<T, A>::register_type)mask.data);
         }
 
         // insert
@@ -504,9 +518,7 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<altivec>) noexcept
         {
-            auto lo = vec_ld(0, reinterpret_cast<const typename batch<T, A>::register_type*>(mem));
-            auto hi = vec_ld(16, reinterpret_cast<const typename batch<T, A>::register_type*>(mem));
-            return vec_perm(lo, hi, vec_lvsl(0, mem));
+            return *(typename batch<T, A>::register_type const*)mem;
         }
 
         // load_complex
@@ -515,7 +527,9 @@ namespace xsimd
             template <class A>
             XSIMD_INLINE batch<std::complex<float>, A> load_complex(batch<float, A> const& hi, batch<float, A> const& lo, requires_arch<altivec>) noexcept
             {
-                return { vec_mergee(hi.data, lo.data), vec_mergeo(hi.data, lo.data) };
+                __vector unsigned char perme = { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
+                __vector unsigned char permo = { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
+                return { vec_perm(hi.data, lo.data, perme), vec_perm(hi.data, lo.data, permo) };
             }
             template <class A>
             XSIMD_INLINE batch<std::complex<double>, A> load_complex(batch<double, A> const& hi, batch<double, A> const& lo, requires_arch<altivec>) noexcept
@@ -685,7 +699,7 @@ namespace xsimd
         {
             auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
             auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
-            auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
             auto tmp3 = vec_add(tmp1, tmp2);
             return vec_extract(tmp3, 0);
         }
@@ -694,7 +708,7 @@ namespace xsimd
         {
             auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
             auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
-            auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
             auto tmp3 = vec_add(tmp1, tmp2);
             return vec_extract(tmp3, 0);
         }
@@ -704,7 +718,7 @@ namespace xsimd
             // FIXME: find an in-order approach
             auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
             auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
-            auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
             auto tmp3 = vec_add(tmp1, tmp2);
             return vec_extract(tmp3, 0);
         }
@@ -783,7 +797,7 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<altivec>) noexcept
         {
-            return vec_sel(true_br.data, false_br.data, cond.data);
+            return vec_sel(false_br.data, true_br.data, cond.data);
         }
         template <class A, class T, bool... Values, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<altivec>) noexcept
@@ -844,14 +858,29 @@ namespace xsimd
         template <size_t N, class A, class T>
         XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<altivec>) noexcept
         {
-            return (typename batch<T, A>::register_type)vec_sll((__vector unsigned char)x.data, vec_splats((uint32_t)N));
+            XSIMD_IF_CONSTEXPR(N == batch<T, A>::size * sizeof(T))
+            {
+                return batch<T, A>(0);
+            }
+            else
+            {
+                auto slider = vec_splats((uint8_t)(8 * N));
+                return (typename batch<T, A>::register_type)vec_slo(x.data, slider);
+            }
         }
 
         // slide_right
         template <size_t N, class A, class T>
         XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<altivec>) noexcept
         {
-            return (typename batch<T, A>::register_type)vec_srl((__vector unsigned char)x.data, vec_splats((uint32_t)N));
+            XSIMD_IF_CONSTEXPR(N == batch<T, A>::size * sizeof(T))
+            {
+                return batch<T, A>(0);
+            }
+            else
+            {
+                return (typename batch<T, A>::register_type)vec_sro((__vector unsigned char)x.data, vec_splats((uint8_t)(8 * N)));
+            }
         }
 
         // sadd
@@ -895,14 +924,7 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept
         {
-            auto tmp = vec_perm(*reinterpret_cast<const __vector unsigned char*>(&self.data), *reinterpret_cast<const __vector unsigned char*>(&self.data), vec_lvsr(0, (unsigned char*)mem));
-            vec_ste((__vector unsigned char)tmp, 0, (unsigned char*)mem);
-            vec_ste((__vector unsigned short)tmp, 1, (unsigned short*)mem);
-            vec_ste((__vector unsigned int)tmp, 3, (unsigned int*)mem);
-            vec_ste((__vector unsigned int)tmp, 4, (unsigned int*)mem);
-            vec_ste((__vector unsigned int)tmp, 8, (unsigned int*)mem);
-            vec_ste((__vector unsigned int)tmp, 12, (unsigned int*)mem);
-            vec_ste((__vector unsigned short)tmp, 14, (unsigned short*)mem);
+            *(typename batch<T, A>::register_type*)mem = self.data;
         }
 
         // sub
@@ -1064,14 +1086,14 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
         {
-            return vec_mergeh(self.data, other.data);
+            return vec_mergel(self.data, other.data);
         }
 
         // zip_lo
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
         {
-            return vec_mergel(self.data, other.data);
+            return vec_mergeh(self.data, other.data);
         }
     }
 }
diff --git a/test/test_batch_int.cpp b/test/test_batch_int.cpp
@@ -285,21 +285,15 @@ struct batch_int_test
         for (int32_t i = 0; i < s; ++i)
         {
             res = lhs << i;
-            value_type expected = value_type(1) << i;
-            for (std::size_t j = 0; j < size; ++j)
-            {
-                CHECK_EQ(res.get(j), expected);
-            }
+            batch_type expected(value_type(1) << i);
+            CHECK_BATCH_EQ(res, expected);
         }
         lhs = batch_type(std::numeric_limits<value_type>::max());
         for (int32_t i = 0; i < s; ++i)
         {
             res = lhs >> i;
-            value_type expected = std::numeric_limits<value_type>::max() >> i;
-            for (std::size_t j = 0; j < size; ++j)
-            {
-                CHECK_EQ(res.get(j), expected);
-            }
+            batch_type expected(std::numeric_limits<value_type>::max() >> i);
+            CHECK_BATCH_EQ(res, expected);
         }
     }
 

Original file line number	Diff line number	Diff line change
`@@ -1087,7 +1087,7 @@ namespace xsimd`
`1087`	`1087`	`template <class A, class T>`
`1088`	`1088`	`XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<common>) noexcept`
`1089`	`1089`	`{`
`1090`		`- return batch<T, A>(self.data) & batch<T, A>(1);`
	`1090`	`+ return batch<T, A>((typename batch<T, A>::register_type)self.data) & batch<T, A>(1);`
`1091`	`1091`	`}`
`1092`	`1092`
`1093`	`1093`	`// horner`
Original file line number	Diff line number	Diff line change
`@@ -122,9 +122,16 @@ namespace xsimd`
`122`	`122`	`template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>`
`123`	`123`	`XSIMD_INLINE batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept`
`124`	`124`	`{`
`125`		`- constexpr auto nbit = 8 * sizeof(T) - 1;`
`126`		`- auto adj = ((self ^ other) << nbit) >> nbit;`
`127`		`- return avgr(self, other, A {}) - adj;`
	`125`	`+ XSIMD_IF_CONSTEXPR(sizeof(T) < 8)`
	`126`	`+ {`
	`127`	`+ constexpr auto nbit = 8 * sizeof(T) - 1;`
	`128`	`+ auto adj = bitwise_cast<T>(bitwise_cast<as_unsigned_integer_t<T>>((self ^ other) << nbit) >> nbit);`
	`129`	`+ return avgr(self, other, A {}) - adj;`
	`130`	`+ }`
	`131`	`+ else`
	`132`	`+ {`
	`133`	`+ return avg(self, other, common {});`
	`134`	`+ }`
`128`	`135`	`}`
`129`	`136`	`template <class A>`
`130`	`137`	`XSIMD_INLINE batch<float, A> avg(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept`
`@@ -207,7 +214,14 @@ namespace xsimd`
`207`	`214`	`{`
`208`	`215`	`using shift_type = as_unsigned_integer_t<T>;`
`209`	`216`	`batch<shift_type, A> shift(static_cast<shift_type>(other));`
`210`		`- return vec_sr(self.data, shift.data);`
	`217`	`+ XSIMD_IF_CONSTEXPR(std::is_signed<T>::value)`
	`218`	`+ {`
	`219`	`+ return vec_sra(self.data, shift.data);`
	`220`	`+ }`
	`221`	`+ else`
	`222`	`+ {`
	`223`	`+ return vec_sr(self.data, shift.data);`
	`224`	`+ }`
`211`	`225`	`}`
`212`	`226`
`213`	`227`	`// bitwise_xor`
`@@ -226,7 +240,7 @@ namespace xsimd`
`226`	`240`	`template <class A, class T_in, class T_out>`
`227`	`241`	`XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<altivec>) noexcept`
`228`	`242`	`{`
`229`		`- return reinterpret_cast<typename batch<T_out, A>::register_type const>(&self.data);`
	`243`	`+ return (typename batch<T_out, A>::register_type)(self.data);`
`230`	`244`	`}`
`231`	`245`
`232`	`246`	`// broadcast`
`@@ -243,43 +257,43 @@ namespace xsimd`
`243`	`257`	`template <class A>`
`244`	`258`	`XSIMD_INLINE batch<float, A> complex_low(batch<std::complex<float>, A> const& self, requires_arch<altivec>) noexcept`
`245`	`259`	`{`
`246`		`- return vec_mergel(self.real().data, self.imag().data);`
	`260`	`+ return vec_mergeh(self.real().data, self.imag().data);`
`247`	`261`	`}`
`248`	`262`	`template <class A>`
`249`	`263`	`XSIMD_INLINE batch<double, A> complex_low(batch<std::complex<double>, A> const& self, requires_arch<altivec>) noexcept`
`250`	`264`	`{`
`251`		`- return vec_mergel(self.real().data, self.imag().data);`
	`265`	`+ return vec_mergeh(self.real().data, self.imag().data);`
`252`	`266`	`}`
`253`	`267`	`// complex_high`
`254`	`268`	`template <class A>`
`255`	`269`	`XSIMD_INLINE batch<float, A> complex_high(batch<std::complex<float>, A> const& self, requires_arch<altivec>) noexcept`
`256`	`270`	`{`
`257`		`- return vec_mergeh(self.real().data, self.imag().data);`
	`271`	`+ return vec_mergel(self.real().data, self.imag().data);`
`258`	`272`	`}`
`259`	`273`	`template <class A>`
`260`	`274`	`XSIMD_INLINE batch<double, A> complex_high(batch<std::complex<double>, A> const& self, requires_arch<altivec>) noexcept`
`261`	`275`	`{`
`262`		`- return vec_mergeh(self.real().data, self.imag().data);`
	`276`	`+ return vec_mergel(self.real().data, self.imag().data);`
`263`	`277`	`}`
`264`	`278`	`}`
`265`	`279`
`266`	`280`	`// decr_if`
`267`	`281`	`template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>`
`268`	`282`	`XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept`
`269`	`283`	`{`
`270`		`- return self + batch<T, A>(mask.data);`
	`284`	`+ return self + batch<T, A>((typename batch<T, A>::register_type)mask.data);`
`271`	`285`	`}`
`272`	`286`
`273`	`287`	`// div`
`274`	`288`	`template <class A>`
`275`	`289`	`XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept`
`276`	`290`	`{`
`277`		`- return vec_mul(self.data, vec_re(other.data));`
	`291`	`+ return vec_div(self.data, other.data);`
`278`	`292`	`}`
`279`	`293`	`template <class A>`
`280`	`294`	`XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept`
`281`	`295`	`{`
`282`		`- return vec_mul(self.data, vec_re(other.data));`
	`296`	`+ return vec_div(self.data, other.data);`
`283`	`297`	`}`
`284`	`298`
`285`	`299`	`// fast_cast`
`@@ -471,7 +485,7 @@ namespace xsimd`
`471`	`485`	`template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>`
`472`	`486`	`XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept`
`473`	`487`	`{`
`474`		`- return self - batch<T, A>(mask.data);`
	`488`	`+ return self - batch<T, A>((typename batch<T, A>::register_type)mask.data);`
`475`	`489`	`}`
`476`	`490`
`477`	`491`	`// insert`
`@@ -504,9 +518,7 @@ namespace xsimd`
`504`	`518`	`template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`505`	`519`	`XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<altivec>) noexcept`
`506`	`520`	`{`
`507`		`- auto lo = vec_ld(0, reinterpret_cast<const typename batch<T, A>::register_type*>(mem));`
`508`		`- auto hi = vec_ld(16, reinterpret_cast<const typename batch<T, A>::register_type*>(mem));`
`509`		`- return vec_perm(lo, hi, vec_lvsl(0, mem));`
	`521`	`+ return (typename batch<T, A>::register_type const)mem;`
`510`	`522`	`}`
`511`	`523`
`512`	`524`	`// load_complex`
`@@ -515,7 +527,9 @@ namespace xsimd`
`515`	`527`	`template <class A>`
`516`	`528`	`XSIMD_INLINE batch<std::complex<float>, A> load_complex(batch<float, A> const& hi, batch<float, A> const& lo, requires_arch<altivec>) noexcept`
`517`	`529`	`{`
`518`		`- return { vec_mergee(hi.data, lo.data), vec_mergeo(hi.data, lo.data) };`
	`530`	`+ __vector unsigned char perme = { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };`
	`531`	`+ __vector unsigned char permo = { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };`
	`532`	`+ return { vec_perm(hi.data, lo.data, perme), vec_perm(hi.data, lo.data, permo) };`
`519`	`533`	`}`
`520`	`534`	`template <class A>`
`521`	`535`	`XSIMD_INLINE batch<std::complex<double>, A> load_complex(batch<double, A> const& hi, batch<double, A> const& lo, requires_arch<altivec>) noexcept`
`@@ -685,7 +699,7 @@ namespace xsimd`
`685`	`699`	`{`
`686`	`700`	`auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0`
`687`	`701`	`auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0`
`688`		`- auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
	`702`	`+ auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
`689`	`703`	`auto tmp3 = vec_add(tmp1, tmp2);`
`690`	`704`	`return vec_extract(tmp3, 0);`
`691`	`705`	`}`
`@@ -694,7 +708,7 @@ namespace xsimd`
`694`	`708`	`{`
`695`	`709`	`auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0`
`696`	`710`	`auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0`
`697`		`- auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
	`711`	`+ auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
`698`	`712`	`auto tmp3 = vec_add(tmp1, tmp2);`
`699`	`713`	`return vec_extract(tmp3, 0);`
`700`	`714`	`}`
`@@ -704,7 +718,7 @@ namespace xsimd`
`704`	`718`	`// FIXME: find an in-order approach`
`705`	`719`	`auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0`
`706`	`720`	`auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0`
`707`		`- auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
	`721`	`+ auto tmp2 = vec_mergel(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0`
`708`	`722`	`auto tmp3 = vec_add(tmp1, tmp2);`
`709`	`723`	`return vec_extract(tmp3, 0);`
`710`	`724`	`}`
`@@ -783,7 +797,7 @@ namespace xsimd`
`783`	`797`	`template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`784`	`798`	`XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<altivec>) noexcept`
`785`	`799`	`{`
`786`		`- return vec_sel(true_br.data, false_br.data, cond.data);`
	`800`	`+ return vec_sel(false_br.data, true_br.data, cond.data);`
`787`	`801`	`}`
`788`	`802`	`template <class A, class T, bool... Values, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`789`	`803`	`XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<altivec>) noexcept`
`@@ -844,14 +858,29 @@ namespace xsimd`
`844`	`858`	`template <size_t N, class A, class T>`
`845`	`859`	`XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<altivec>) noexcept`
`846`	`860`	`{`
`847`		`- return (typename batch<T, A>::register_type)vec_sll((__vector unsigned char)x.data, vec_splats((uint32_t)N));`
	`861`	`+ XSIMD_IF_CONSTEXPR(N == batch<T, A>::size * sizeof(T))`
	`862`	`+ {`
	`863`	`+ return batch<T, A>(0);`
	`864`	`+ }`
	`865`	`+ else`
	`866`	`+ {`
	`867`	`+ auto slider = vec_splats((uint8_t)(8 * N));`
	`868`	`+ return (typename batch<T, A>::register_type)vec_slo(x.data, slider);`
	`869`	`+ }`
`848`	`870`	`}`
`849`	`871`
`850`	`872`	`// slide_right`
`851`	`873`	`template <size_t N, class A, class T>`
`852`	`874`	`XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<altivec>) noexcept`
`853`	`875`	`{`
`854`		`- return (typename batch<T, A>::register_type)vec_srl((__vector unsigned char)x.data, vec_splats((uint32_t)N));`
	`876`	`+ XSIMD_IF_CONSTEXPR(N == batch<T, A>::size * sizeof(T))`
	`877`	`+ {`
	`878`	`+ return batch<T, A>(0);`
	`879`	`+ }`
	`880`	`+ else`
	`881`	`+ {`
	`882`	`+ return (typename batch<T, A>::register_type)vec_sro((__vector unsigned char)x.data, vec_splats((uint8_t)(8 * N)));`
	`883`	`+ }`
`855`	`884`	`}`
`856`	`885`
`857`	`886`	`// sadd`
`@@ -895,14 +924,7 @@ namespace xsimd`
`895`	`924`	`template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`896`	`925`	`XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<altivec>) noexcept`
`897`	`926`	`{`
`898`		`- auto tmp = vec_perm(reinterpret_cast<const __vector unsigned char>(&self.data), reinterpret_cast<const __vector unsigned char>(&self.data), vec_lvsr(0, (unsigned char*)mem));`
`899`		`- vec_ste((__vector unsigned char)tmp, 0, (unsigned char*)mem);`
`900`		`- vec_ste((__vector unsigned short)tmp, 1, (unsigned short*)mem);`
`901`		`- vec_ste((__vector unsigned int)tmp, 3, (unsigned int*)mem);`
`902`		`- vec_ste((__vector unsigned int)tmp, 4, (unsigned int*)mem);`
`903`		`- vec_ste((__vector unsigned int)tmp, 8, (unsigned int*)mem);`
`904`		`- vec_ste((__vector unsigned int)tmp, 12, (unsigned int*)mem);`
`905`		`- vec_ste((__vector unsigned short)tmp, 14, (unsigned short*)mem);`
	`927`	`+ (typename batch<T, A>::register_type)mem = self.data;`
`906`	`928`	`}`
`907`	`929`
`908`	`930`	`// sub`
`@@ -1064,14 +1086,14 @@ namespace xsimd`
`1064`	`1086`	`template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`1065`	`1087`	`XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept`
`1066`	`1088`	`{`
`1067`		`- return vec_mergeh(self.data, other.data);`
	`1089`	`+ return vec_mergel(self.data, other.data);`
`1068`	`1090`	`}`
`1069`	`1091`
`1070`	`1092`	`// zip_lo`
`1071`	`1093`	`template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>`
`1072`	`1094`	`XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept`
`1073`	`1095`	`{`
`1074`		`- return vec_mergel(self.data, other.data);`
	`1096`	`+ return vec_mergeh(self.data, other.data);`
`1075`	`1097`	`}`
`1076`	`1098`	`}`
`1077`	`1099`	`}`
Original file line number	Diff line number	Diff line change
`@@ -285,21 +285,15 @@ struct batch_int_test`
`285`	`285`	`for (int32_t i = 0; i < s; ++i)`
`286`	`286`	`{`
`287`	`287`	`res = lhs << i;`
`288`		`- value_type expected = value_type(1) << i;`
`289`		`- for (std::size_t j = 0; j < size; ++j)`
`290`		`- {`
`291`		`- CHECK_EQ(res.get(j), expected);`
`292`		`- }`
	`288`	`+ batch_type expected(value_type(1) << i);`
	`289`	`+ CHECK_BATCH_EQ(res, expected);`
`293`	`290`	`}`
`294`	`291`	`lhs = batch_type(std::numeric_limits<value_type>::max());`
`295`	`292`	`for (int32_t i = 0; i < s; ++i)`
`296`	`293`	`{`
`297`	`294`	`res = lhs >> i;`
`298`		`- value_type expected = std::numeric_limits<value_type>::max() >> i;`
`299`		`- for (std::size_t j = 0; j < size; ++j)`
`300`		`- {`
`301`		`- CHECK_EQ(res.get(j), expected);`
`302`		`- }`
	`295`	`+ batch_type expected(std::numeric_limits<value_type>::max() >> i);`
	`296`	`+ CHECK_BATCH_EQ(res, expected);`
`303`	`297`	`}`
`304`	`298`	`}`
`305`	`299`