Skip to content

Commit 0c98b26

Browse files
Optimize loading of batch_bool from memory on arm
Use the same approach as #1172
1 parent 005f629 commit 0c98b26

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,34 @@ namespace xsimd
573573
return vld1q_f32(src);
574574
}
575575

576+
/* batch bool version */
577+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
578+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579+
{
580+
auto vmem = load_unaligned<A>((unsigned char const*)mem, convert<unsigned char> {}, T {});
581+
return bitwise_cast<T>(0 - vmem);
582+
}
583+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
584+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
585+
{
586+
auto vmem = load_aligned<A>((unsigned char const*)mem, convert<unsigned char> {}, T {});
587+
return bitwise_cast<T>(0 - vmem);
588+
}
589+
590+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
591+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
592+
{
593+
auto vmem = vmovl_u8(vld1_u8((unsigned char const*)mem);
594+
return bitwise_cast<T>(0 - vmem);
595+
}
596+
597+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
598+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
599+
{
600+
auto vmem = vmovl_u8(vld1_u8((unsigned char const*)mem);
601+
return bitwise_cast<T>(0 - vmem);
602+
}
603+
576604
/*********
577605
* store *
578606
*********/

0 commit comments

Comments
 (0)