WIP

serge-sans-paille · serge-sans-paille · commit 5451915e099f · 2025-10-31T23:43:09.000+01:00
diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp
@@ -20,6 +20,7 @@
 
 #include "../types/xsimd_neon_register.hpp"
 #include "../types/xsimd_utils.hpp"
+#include "./common/xsimd_common_cast.hpp"
 
 // Wrap intrinsics so we can pass them as function pointers
 // - OP: intrinsics name prefix, e.g., vorrq
@@ -3211,6 +3212,19 @@ namespace xsimd
         {
             return vreinterpretq_f32_u32(swizzle(batch<uint32_t, A>(vreinterpretq_u32_f32(self)), mask, A {}));
         }
+
+        /*********
+         * widen *
+         *********/
+        template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
+        XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept {
+          return {batch<widen_t<T>, A>(vaddl_s8(vget_low_s8(x), vdup_n_s8(0))), batch<widen_t<T>, A>(vaddl_s8(vget_high_s8(x), vdup_n_s8(0)))};
+        }
+        template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
+        XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept {
+          return {batch<widen_t<T>, A>(vaddl_u8(vget_low_u8(x), vdup_n_u8(0))), batch<widen_t<T>, A>(vaddl_u8(vget_high_u8(x), vdup_n_u8(0)))};
+        }
+
     }
 
 }
diff --git a/include/xsimd/arch/xsimd_neon64.hpp b/include/xsimd/arch/xsimd_neon64.hpp
@@ -1552,7 +1552,7 @@ namespace xsimd
          * widen *
          *********/
         template <class A, class T>
-        XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<neon>) noexcept
+        XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<neon64>) noexcept
         {
             return { batch<double, A>(vcvt_f64_f32(vget_low_f32(x))), batch<double, A>(vcvt_high_f64_f32(x)) };
         }

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`
`21`	`21`	`#include "../types/xsimd_neon_register.hpp"`
`22`	`22`	`#include "../types/xsimd_utils.hpp"`
	`23`	`+#include "./common/xsimd_common_cast.hpp"`
`23`	`24`
`24`	`25`	`// Wrap intrinsics so we can pass them as function pointers`
`25`	`26`	`// - OP: intrinsics name prefix, e.g., vorrq`
`@@ -3211,6 +3212,19 @@ namespace xsimd`
`3211`	`3212`	`{`
`3212`	`3213`	`return vreinterpretq_f32_u32(swizzle(batch<uint32_t, A>(vreinterpretq_u32_f32(self)), mask, A {}));`
`3213`	`3214`	`}`
	`3215`	`+`
	`3216`	`+ /*********`
	`3217`	`+ * widen *`
	`3218`	`+ *********/`
	`3219`	`+ template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>`
	`3220`	`+ XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept {`
	`3221`	`+ return {batch<widen_t<T>, A>(vaddl_s8(vget_low_s8(x), vdup_n_s8(0))), batch<widen_t<T>, A>(vaddl_s8(vget_high_s8(x), vdup_n_s8(0)))};`
	`3222`	`+ }`
	`3223`	`+ template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>`
	`3224`	`+ XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept {`
	`3225`	`+ return {batch<widen_t<T>, A>(vaddl_u8(vget_low_u8(x), vdup_n_u8(0))), batch<widen_t<T>, A>(vaddl_u8(vget_high_u8(x), vdup_n_u8(0)))};`
	`3226`	`+ }`
	`3227`	`+`
`3214`	`3228`	`}`
`3215`	`3229`
`3216`	`3230`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1552,7 +1552,7 @@ namespace xsimd`
`1552`	`1552`	`* widen *`
`1553`	`1553`	`*********/`
`1554`	`1554`	`template <class A, class T>`
`1555`		`- XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<neon>) noexcept`
	`1555`	`+ XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<neon64>) noexcept`
`1556`	`1556`	`{`
`1557`	`1557`	`return { batch<double, A>(vcvt_f64_f32(vget_low_f32(x))), batch<double, A>(vcvt_high_f64_f32(x)) };`
`1558`	`1558`	`}`