@@ -61,11 +61,11 @@ namespace xsimd
6161 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<common>) noexcept;
6262 template <class A, typename T, typename ITy, ITy... Indices>
6363 XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<common>) noexcept;
64+ #endif
6465 template <class A , class T >
6566 XSIMD_INLINE batch<T, A> avg (batch<T, A> const &, batch<T, A> const &, requires_arch<common>) noexcept ;
6667 template <class A , class T >
6768 XSIMD_INLINE batch<T, A> avgr (batch<T, A> const &, batch<T, A> const &, requires_arch<common>) noexcept ;
68- #endif
6969
7070 // abs
7171 template <class A >
@@ -102,11 +102,21 @@ namespace xsimd
102102 }
103103
104104 // avgr
105- template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
105+ template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value && sizeof (T) < 8 , void >::type>
106106 XSIMD_INLINE batch<T, A> avgr (batch<T, A> const & self, batch<T, A> const & other, requires_arch<altivec>) noexcept
107107 {
108108 return vec_avg (self.data , other.data );
109109 }
110+ template <class A >
111+ XSIMD_INLINE batch<float , A> avgr (batch<float , A> const & self, batch<float , A> const & other, requires_arch<altivec>) noexcept
112+ {
113+ return avgr (self, other, common {});
114+ }
115+ template <class A >
116+ XSIMD_INLINE batch<double , A> avgr (batch<double , A> const & self, batch<double , A> const & other, requires_arch<altivec>) noexcept
117+ {
118+ return avgr (self, other, common {});
119+ }
110120
111121 // avg
112122 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
@@ -116,6 +126,16 @@ namespace xsimd
116126 auto adj = ((self ^ other) << nbit) >> nbit;
117127 return avgr (self, other, A {}) - adj;
118128 }
129+ template <class A >
130+ XSIMD_INLINE batch<float , A> avg (batch<float , A> const & self, batch<float , A> const & other, requires_arch<altivec>) noexcept
131+ {
132+ return avg (self, other, common {});
133+ }
134+ template <class A >
135+ XSIMD_INLINE batch<double , A> avg (batch<double , A> const & self, batch<double , A> const & other, requires_arch<altivec>) noexcept
136+ {
137+ return avg (self, other, common {});
138+ }
119139
120140 // batch_bool_cast
121141 template <class A , class T_out , class T_in >
@@ -439,6 +459,14 @@ namespace xsimd
439459 return vec_add (tmp6, tmp7);
440460 }
441461
462+ template <class A >
463+ XSIMD_INLINE batch<double , A> haddp (batch<double , A> const * row, requires_arch<altivec>) noexcept
464+ {
465+ auto tmp0 = vec_mergee (row[0 ].data , row[1 ].data ); // v00 v10 v02 v12
466+ auto tmp1 = vec_mergeo (row[0 ].data , row[1 ].data ); // v01 v11 v03 v13
467+ return vec_add (tmp0, tmp1);
468+ }
469+
442470 // incr_if
443471 template <class A , class T , class = typename std::enable_if<std::is_integral<T>::value, void >::type>
444472 XSIMD_INLINE batch<T, A> incr_if (batch<T, A> const & self, batch_bool<T, A> const & mask, requires_arch<altivec>) noexcept
@@ -652,21 +680,23 @@ namespace xsimd
652680 }
653681
654682 // reduce_add
655- template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type >
656- XSIMD_INLINE T reduce_add (batch<T , A> const & self, requires_arch<altivec>) noexcept
683+ template <class A >
684+ XSIMD_INLINE signed reduce_add (batch<signed , A> const & self, requires_arch<altivec>) noexcept
657685 {
658- XSIMD_IF_CONSTEXPR (sizeof (T) == 4 )
659- {
660- auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
661- auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
662- auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
663- auto tmp3 = vec_add (tmp1, tmp2);
664- return vec_extract (tmp3, 0 );
665- }
666- else
667- {
668- return hadd (self, common {});
669- }
686+ auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
687+ auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
688+ auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
689+ auto tmp3 = vec_add (tmp1, tmp2);
690+ return vec_extract (tmp3, 0 );
691+ }
692+ template <class A >
693+ XSIMD_INLINE unsigned reduce_add (batch<unsigned , A> const & self, requires_arch<altivec>) noexcept
694+ {
695+ auto tmp0 = vec_reve (self.data ); // v3, v2, v1, v0
696+ auto tmp1 = vec_add (self.data , tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
697+ auto tmp2 = vec_mergeh (tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
698+ auto tmp3 = vec_add (tmp1, tmp2);
699+ return vec_extract (tmp3, 0 );
670700 }
671701 template <class A >
672702 XSIMD_INLINE float reduce_add (batch<float , A> const & self, requires_arch<altivec>) noexcept
@@ -678,6 +708,18 @@ namespace xsimd
678708 auto tmp3 = vec_add (tmp1, tmp2);
679709 return vec_extract (tmp3, 0 );
680710 }
711+ template <class A >
712+ XSIMD_INLINE double reduce_add (batch<double , A> const & self, requires_arch<altivec>) noexcept
713+ {
714+ auto tmp0 = vec_reve (self.data ); // v1, v0
715+ auto tmp1 = vec_add (self.data , tmp0); // v0 + v1, v1 + v0
716+ return vec_extract (tmp1, 0 );
717+ }
718+ template <class A , class T , class = typename std::enable_if<std::is_scalar<T>::value, void >::type>
719+ XSIMD_INLINE T reduce_add (batch<T, A> const & self, requires_arch<altivec>) noexcept
720+ {
721+ return reduce_add (self, common {});
722+ }
681723
682724#if 0
683725 // reduce_max
0 commit comments