Skip to content

Commit 619d93b

Browse files
++
1 parent 13f5e77 commit 619d93b

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -730,24 +730,31 @@ namespace xsimd
730730
template <class T, class A, detail::enable_sized_t<T, 1> = 0>
731731
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
732732
{
733-
uint8x16_t val = vsubq_u8(vdupq_n_u8(0), b.data);
733+
uint8x16_t val = vshrq_n_u8(b.data, 7);
734734
vst1q_u8((uint8_t*)mem, val);
735735
}
736736

737737
template <class T, class A, detail::enable_sized_t<T, 2> = 0>
738738
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
739739
{
740-
uint8x8_t val = vsub_u8(vdup_n_u8(0), vqmovn_u16(b.data));
740+
uint8x8_t val = vshr_n_u8(vqmovn_u16(b.data), 7);
741741
vst1_u8((uint8_t*)mem, val);
742742
}
743743

744744
template <class T, class A, detail::enable_sized_t<T, 4> = 0>
745745
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
746746
{
747-
uint8x8_t val = vsub_u8(vdup_n_u8(0), vqmovn_u16(vcombine_u16(vqmovn_u32(b.data), vdup_n_u16(0))));
747+
uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(b.data), vdup_n_u16(0))), 7);
748748
vst1_lane_u32((uint32_t*)mem, vreinterpret_u32_u8(val), 0);
749749
}
750750

751+
template <class T, class A, detail::enable_sized_t<T, 8> = 0>
752+
XSIMD_INLINE void store(batch_bool<T, A> b, bool* mem, requires_arch<neon>) noexcept
753+
{
754+
uint8x8_t val = vshr_n_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(vcombine_u32(vqmovn_u64(b.data), vdup_n_u32(0))), vdup_n_u16(0))), 7);
755+
vst1_lane_u16((uint16_t*)mem, vreinterpret_u16_u8(val), 0);
756+
}
757+
751758
template <class A>
752759
XSIMD_INLINE void store(batch_bool<float, A> b, bool* mem, requires_arch<neon>) noexcept
753760
{

include/xsimd/arch/xsimd_neon64.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,16 @@ namespace xsimd
177177
return store_aligned<A>(dst, src, A {});
178178
}
179179

180+
/*********************
181+
* store<batch_bool> *
182+
*********************/
183+
184+
template <class A>
185+
XSIMD_INLINE void store(batch_bool<double, A> b, bool* mem, requires_arch<neon>) noexcept
186+
{
187+
store(batch_bool<uint64_t, A>(b.data), mem, A {});
188+
}
189+
180190
/****************
181191
* load_complex *
182192
****************/

0 commit comments

Comments
 (0)