Skip to content

Commit 82bb1c3

Browse files
WIP
1 parent 49fc69f commit 82bb1c3

File tree

2 files changed

+70
-17
lines changed

2 files changed

+70
-17
lines changed

include/xsimd/arch/xsimd_altivec.hpp

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ namespace xsimd
6161
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<common>) noexcept;
6262
template <class A, typename T, typename ITy, ITy... Indices>
6363
XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<common>) noexcept;
64+
#endif
6465
template <class A, class T>
6566
XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
6667
template <class A, class T>
6768
XSIMD_INLINE batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
68-
#endif
6969

7070
// abs
7171
template <class A>
@@ -102,11 +102,21 @@ namespace xsimd
102102
}
103103

104104
// avgr
105-
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
105+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value && sizeof(T) < 8, void>::type>
106106
XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
107107
{
108108
return vec_avg(self.data, other.data);
109109
}
110+
template <class A>
111+
XSIMD_INLINE batch<float, A> avgr(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
112+
{
113+
return avgr(self, other, common {});
114+
}
115+
template <class A>
116+
XSIMD_INLINE batch<double, A> avgr(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
117+
{
118+
return avgr(self, other, common {});
119+
}
110120

111121
// avg
112122
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
@@ -116,6 +126,16 @@ namespace xsimd
116126
auto adj = ((self ^ other) << nbit) >> nbit;
117127
return avgr(self, other, A {}) - adj;
118128
}
129+
template <class A>
130+
XSIMD_INLINE batch<float, A> avg(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
131+
{
132+
return avg(self, other, common {});
133+
}
134+
template <class A>
135+
XSIMD_INLINE batch<double, A> avg(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
136+
{
137+
return avg(self, other, common {});
138+
}
119139

120140
// batch_bool_cast
121141
template <class A, class T_out, class T_in>
@@ -439,6 +459,14 @@ namespace xsimd
439459
return vec_add(tmp6, tmp7);
440460
}
441461

462+
template <class A>
463+
XSIMD_INLINE batch<double, A> haddp(batch<double, A> const* row, requires_arch<altivec>) noexcept
464+
{
465+
auto tmp0 = vec_mergee(row[0].data, row[1].data); // v00 v10 v02 v12
466+
auto tmp1 = vec_mergeo(row[0].data, row[1].data); // v01 v11 v03 v13
467+
return vec_add(tmp0, tmp1);
468+
}
469+
442470
// incr_if
443471
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
444472
XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept
@@ -652,21 +680,23 @@ namespace xsimd
652680
}
653681

654682
// reduce_add
655-
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
656-
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<altivec>) noexcept
683+
template <class A>
684+
XSIMD_INLINE signed reduce_add(batch<signed, A> const& self, requires_arch<altivec>) noexcept
657685
{
658-
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
659-
{
660-
auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
661-
auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
662-
auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
663-
auto tmp3 = vec_add(tmp1, tmp2);
664-
return vec_extract(tmp3, 0);
665-
}
666-
else
667-
{
668-
return hadd(self, common {});
669-
}
686+
auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
687+
auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
688+
auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
689+
auto tmp3 = vec_add(tmp1, tmp2);
690+
return vec_extract(tmp3, 0);
691+
}
692+
template <class A>
693+
XSIMD_INLINE unsigned reduce_add(batch<unsigned, A> const& self, requires_arch<altivec>) noexcept
694+
{
695+
auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
696+
auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
697+
auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
698+
auto tmp3 = vec_add(tmp1, tmp2);
699+
return vec_extract(tmp3, 0);
670700
}
671701
template <class A>
672702
XSIMD_INLINE float reduce_add(batch<float, A> const& self, requires_arch<altivec>) noexcept
@@ -678,6 +708,18 @@ namespace xsimd
678708
auto tmp3 = vec_add(tmp1, tmp2);
679709
return vec_extract(tmp3, 0);
680710
}
711+
template <class A>
712+
XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<altivec>) noexcept
713+
{
714+
auto tmp0 = vec_reve(self.data); // v1, v0
715+
auto tmp1 = vec_add(self.data, tmp0); // v0 + v1, v1 + v0
716+
return vec_extract(tmp1, 0);
717+
}
718+
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
719+
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<altivec>) noexcept
720+
{
721+
return reduce_add(self, common {});
722+
}
681723

682724
#if 0
683725
// reduce_max

include/xsimd/config/xsimd_config.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,12 +413,23 @@
413413
*
414414
* Set to 1 if Altivec is available at compile-time, to 0 otherwise.
415415
*/
416-
#ifdef __VEC__
416+
#if defined(__VEC__)
417417
#define XSIMD_WITH_ALTIVEC 1
418418
#else
419419
#define XSIMD_WITH_ALTIVEC 0
420420
#endif
421421

422+
/**
423+
* @ingroup xsimd_config_macro
424+
*
425+
* Set to 1 if Vector Scalar eXtension is available at compile-time, to 0 otherwise.
426+
*/
427+
#if defined(__VSX__)
428+
#define XSIMD_WITH_VSX 1
429+
#else
430+
#define XSIMD_WITH_VSX 0
431+
#endif
432+
422433
// Workaround for MSVC compiler
423434
#ifdef _MSC_VER
424435

0 commit comments

Comments
 (0)