WIP

serge-sans-paille · serge-sans-paille · commit 82bb1c38cc5f · 2025-07-24T10:34:11.000+02:00
diff --git a/include/xsimd/arch/xsimd_altivec.hpp b/include/xsimd/arch/xsimd_altivec.hpp
@@ -61,11 +61,11 @@ namespace xsimd
         XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<common>) noexcept;
         template <class A, typename T, typename ITy, ITy... Indices>
         XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<common>) noexcept;
+#endif
         template <class A, class T>
         XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
         template <class A, class T>
         XSIMD_INLINE batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
-#endif
 
         // abs
         template <class A>
@@ -102,11 +102,21 @@ namespace xsimd
         }
 
         // avgr
-        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value && sizeof(T) < 8, void>::type>
         XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<altivec>) noexcept
         {
             return vec_avg(self.data, other.data);
         }
+        template <class A>
+        XSIMD_INLINE batch<float, A> avgr(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
+        {
+            return avgr(self, other, common {});
+        }
+        template <class A>
+        XSIMD_INLINE batch<double, A> avgr(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
+        {
+            return avgr(self, other, common {});
+        }
 
         // avg
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
@@ -116,6 +126,16 @@ namespace xsimd
             auto adj = ((self ^ other) << nbit) >> nbit;
             return avgr(self, other, A {}) - adj;
         }
+        template <class A>
+        XSIMD_INLINE batch<float, A> avg(batch<float, A> const& self, batch<float, A> const& other, requires_arch<altivec>) noexcept
+        {
+            return avg(self, other, common {});
+        }
+        template <class A>
+        XSIMD_INLINE batch<double, A> avg(batch<double, A> const& self, batch<double, A> const& other, requires_arch<altivec>) noexcept
+        {
+            return avg(self, other, common {});
+        }
 
         // batch_bool_cast
         template <class A, class T_out, class T_in>
@@ -439,6 +459,14 @@ namespace xsimd
             return vec_add(tmp6, tmp7);
         }
 
+        template <class A>
+        XSIMD_INLINE batch<double, A> haddp(batch<double, A> const* row, requires_arch<altivec>) noexcept
+        {
+            auto tmp0 = vec_mergee(row[0].data, row[1].data); // v00 v10 v02 v12
+            auto tmp1 = vec_mergeo(row[0].data, row[1].data); // v01 v11 v03 v13
+            return vec_add(tmp0, tmp1);
+        }
+
         // incr_if
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<altivec>) noexcept
@@ -652,21 +680,23 @@ namespace xsimd
         }
 
         // reduce_add
-        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
-        XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<altivec>) noexcept
+        template <class A>
+        XSIMD_INLINE signed reduce_add(batch<signed, A> const& self, requires_arch<altivec>) noexcept
         {
-            XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
-            {
-                auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
-                auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
-                auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
-                auto tmp3 = vec_add(tmp1, tmp2);
-                return vec_extract(tmp3, 0);
-            }
-            else
-            {
-                return hadd(self, common {});
-            }
+            auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
+            auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
+            auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
+            auto tmp3 = vec_add(tmp1, tmp2);
+            return vec_extract(tmp3, 0);
+        }
+        template <class A>
+        XSIMD_INLINE unsigned reduce_add(batch<unsigned, A> const& self, requires_arch<altivec>) noexcept
+        {
+            auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
+            auto tmp1 = vec_add(self.data, tmp0); // v0 + v3, v1 + v2, v2 + v1, v3 + v0
+            auto tmp2 = vec_mergeh(tmp1, tmp1); // v2 + v1, v2 + v1, v3 + v0, v3 + v0
+            auto tmp3 = vec_add(tmp1, tmp2);
+            return vec_extract(tmp3, 0);
         }
         template <class A>
         XSIMD_INLINE float reduce_add(batch<float, A> const& self, requires_arch<altivec>) noexcept
@@ -678,6 +708,18 @@ namespace xsimd
             auto tmp3 = vec_add(tmp1, tmp2);
             return vec_extract(tmp3, 0);
         }
+        template <class A>
+        XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<altivec>) noexcept
+        {
+            auto tmp0 = vec_reve(self.data); // v1, v0
+            auto tmp1 = vec_add(self.data, tmp0); // v0 + v1, v1 + v0
+            return vec_extract(tmp1, 0);
+        }
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
+        XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<altivec>) noexcept
+        {
+            return reduce_add(self, common {});
+        }
 
 #if 0
         // reduce_max
diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp
@@ -413,12 +413,23 @@
  *
  * Set to 1 if Altivec is available at compile-time, to 0 otherwise.
  */
-#ifdef __VEC__
+#if defined(__VEC__)
 #define XSIMD_WITH_ALTIVEC 1
 #else
 #define XSIMD_WITH_ALTIVEC 0
 #endif
 
+/**
+ * @ingroup xsimd_config_macro
+ *
+ * Set to 1 if Vector Scalar eXtension is available at compile-time, to 0 otherwise.
+ */
+#if defined(__VSX__)
+#define XSIMD_WITH_VSX 1
+#else
+#define XSIMD_WITH_VSX 0
+#endif
+
 // Workaround for MSVC compiler
 #ifdef _MSC_VER