Skip to content

Commit fca912a

Browse files
Optimize loading of batch_bool from memory on arm
Use the same approach as #1172
1 parent 005f629 commit fca912a

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,31 @@ namespace xsimd
573573
return vld1q_f32(src);
574574
}
575575

576+
/* batch bool version */
577+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
578+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
579+
{
580+
auto vmem = load_unaligned<A>((unsigned char const*)mem, convert<unsigned char> {}, T {});
581+
return bitwise_cast<T>(0 - vmem);
582+
}
583+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
584+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
585+
{
586+
return load_unaligned(mem, t, r);
587+
}
588+
589+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
590+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
591+
{
592+
auto vmem = vmovl_u8(vld1_u8((unsigned char const*)mem));
593+
return bitwise_cast<T>(0 - vmem);
594+
}
595+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
596+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
597+
{
598+
return load_unaligned(mem, t, r);
599+
}
600+
576601
/*********
577602
* store *
578603
*********/

0 commit comments

Comments
 (0)