@@ -731,14 +731,28 @@ namespace xsimd
731731 XSIMD_INLINE void store (batch_bool<T, A> b, bool * mem, requires_arch<neon>) noexcept
732732 {
733733 uint8x16_t val = vsubq_u8 (vdupq_n_u8 (0 ), b.data );
734- vst1q_u8 ((unsigned char *)mem, val);
734+ vst1q_u8 ((uint8_t *)mem, val);
735735 }
736736
737- // template <class A>
738- // XSIMD_INLINE void store(batch_bool<float, A> b, bool* mem, requires_arch<sse2>) noexcept
739- // {
740- // store(batch_bool<uint32_t, A>(b.data), mem, A {});
741- // }
737+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
738+ XSIMD_INLINE void store (batch_bool<T, A> b, bool * mem, requires_arch<neon>) noexcept
739+ {
740+ uint8x8_t val = vsub_u8 (vdup_n_u8 (0 ), vqmovn_u16 (b.data ));
741+ vst1_u8 ((uint8_t *)mem, val);
742+ }
743+
744+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
745+ XSIMD_INLINE void store (batch_bool<T, A> b, bool * mem, requires_arch<neon>) noexcept
746+ {
747+ uint8x8_t val = vsub_u8 (vdup_n_u8 (0 ), vqmovn_u16 (vcombine_u16 (vqmovn_u32 (b.data ), vdup_n_u16 (0 ))));
748+ vst1_lane_u32 ((uint32_t *)mem, vreinterpret_u32_u8 (val), 0 );
749+ }
750+
751+ template <class A >
752+ XSIMD_INLINE void store (batch_bool<float , A> b, bool * mem, requires_arch<neon>) noexcept
753+ {
754+ store (batch_bool<uint32_t , A>(b.data ), mem, A {});
755+ }
742756
743757 /* ******
744758 * neg *
0 commit comments