@@ -573,6 +573,52 @@ namespace xsimd
573573 return vld1q_f32 (src);
574574 }
575575
576+ /* batch bool version */
577+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
578+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579+ {
580+ auto vmem = load_unaligned<A>((unsigned char const *)mem, convert<unsigned char > {}, A {});
581+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data );
582+ }
583+ template <class T , class A , detail::enable_sized_t <T, 1 > = 0 >
584+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
585+ {
586+ return load_unaligned (mem, t, r);
587+ }
588+
589+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
590+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
591+ {
592+ batch<unsigned short , neon> vmem = vmovl_u8 (vld1_u8 ((unsigned char const *)mem));
593+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data );
594+ }
595+
596+ template <class T , class A , detail::enable_sized_t <T, 2 > = 0 >
597+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
598+ {
599+ return load_unaligned (mem, t, r);
600+ }
601+
602+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
603+ XSIMD_INLINE batch_bool<T, A> load_unaligned (bool const * mem, batch_bool<T, A>, requires_arch<neon>) noexcept
604+ {
605+ uint32x2_t tmp = vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 );
606+ return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp)))));
607+ }
608+
609+ template <class T , class A , detail::enable_sized_t <T, 4 > = 0 >
610+ XSIMD_INLINE batch_bool<T, A> load_aligned (bool const * mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
611+ {
612+ return load_unaligned (mem, t, r);
613+ }
614+
615+ template <class A > = 0 >
616+ XSIMD_INLINE batch_bool<float , A> load_aligned (bool const * mem, batch_bool<float , A> t, requires_arch<neon> r) noexcept
617+ {
618+ uint32x2_t tmp = vset_lane_u32 (*(unsigned int *)mem, vdup_n_u32 (0 ), 0 );
619+ return batch_bool<float , A>(0 - vmovl_u16 (vget_low_u16 (vmovl_u8 (tmp))));
620+ }
621+
576622 /* ********
577623 * store *
578624 *********/
0 commit comments