Skip to content

Commit f684bdd

Browse files
Optimize loading of batch_bool from memory on arm
Use the same approach as #1172
1 parent 005f629 commit f684bdd

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,34 @@ namespace xsimd
28532853
return caster.apply<dst_register_type>(src_register_type(arg));
28542854
}
28552855

2856+
/*************
2857+
* load bool *
2858+
*************/
2859+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
2860+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
2861+
{
2862+
auto vmem = load_unaligned<A>((unsigned char const*)mem, convert<unsigned char> {}, A {});
2863+
return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data);
2864+
}
2865+
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
2866+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
2867+
{
2868+
return load_unaligned(mem, t, r);
2869+
}
2870+
2871+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
2872+
XSIMD_INLINE batch_bool<T, A> load_unaligned(bool const* mem, batch_bool<T, A>, requires_arch<neon>) noexcept
2873+
{
2874+
auto vmem = vmovl_u8(vld1_u8((unsigned char const*)mem));
2875+
return batch_bool<T, A>(bitwise_cast<typename std::make_unsigned<T>::type>(0 - vmem).data);
2876+
}
2877+
2878+
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
2879+
XSIMD_INLINE batch_bool<T, A> load_aligned(bool const* mem, batch_bool<T, A> t, requires_arch<neon> r) noexcept
2880+
{
2881+
return load_unaligned(mem, t, r);
2882+
}
2883+
28562884
/*********
28572885
* isnan *
28582886
*********/

0 commit comments

Comments
 (0)