Skip to content

Commit 371ab6b

Browse files
committed
Fix build when if constexpr is unavailable
1 parent 7aa9df4 commit 371ab6b

File tree

1 file changed

+55
-49
lines changed

1 file changed

+55
-49
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 55 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3281,70 +3281,76 @@ namespace xsimd
32813281
/********
32823282
* mask *
32833283
********/
3284-
template <class A, class T>
3285-
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3284+
namespace detail
32863285
{
32873286
#ifdef XSIMD_LITTLE_ENDIAN
32883287
static constexpr bool do_swap = false;
32893288
#else
32903289
static constexpr bool do_swap = true;
32913290
#endif
3291+
}
3292+
3293+
template <class A, class T, detail::enable_sized_t<T, 1> = 0>
3294+
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3295+
{
3296+
uint8x16_t inner = self;
3297+
XSIMD_IF_CONSTEXPR(detail::do_swap)
3298+
{
3299+
inner = vrev16q_u8(inner);
3300+
}
32923301

3293-
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
3302+
uint16x8_t pairs = vreinterpretq_u16_u8(inner);
3303+
uint8x8_t narrowed = vshrn_n_u16(pairs, 4);
3304+
XSIMD_IF_CONSTEXPR(detail::do_swap)
32943305
{
3295-
uint8x16_t inner = self;
3296-
XSIMD_IF_CONSTEXPR(do_swap)
3297-
{
3298-
inner = vrev16q_u8(inner);
3299-
}
3306+
narrowed = vrev64_u8(narrowed);
3307+
}
33003308

3301-
uint16x8_t pairs = vreinterpretq_u16_u8(inner);
3302-
uint8x8_t narrowed = vshrn_n_u16(pairs, 4);
3303-
XSIMD_IF_CONSTEXPR(do_swap)
3304-
{
3305-
narrowed = vrev64_u8(narrowed);
3306-
}
3309+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3310+
mask &= 0x1111111111111111;
3311+
mask = mask | mask >> 3;
3312+
mask = (mask | mask >> 6) & 0x000F000F000F000F;
3313+
mask = (mask | mask >> 12) & 0x000000FF000000FF;
3314+
return (mask | mask >> 24) & 0xFFFF;
3315+
}
33073316

3308-
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3309-
mask &= 0x1111111111111111;
3310-
mask = mask | mask >> 3;
3311-
mask = (mask | mask >> 6) & 0x000F000F000F000F;
3312-
mask = (mask | mask >> 12) & 0x000000FF000000FF;
3313-
return (mask | mask >> 24) & 0xFFFF;
3314-
}
3315-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
3317+
template <class A, class T, detail::enable_sized_t<T, 2> = 0>
3318+
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3319+
{
3320+
uint8x8_t narrowed = vmovn_u16(self);
3321+
XSIMD_IF_CONSTEXPR(detail::do_swap)
33163322
{
3317-
uint8x8_t narrowed = vmovn_u16(self);
3318-
XSIMD_IF_CONSTEXPR(do_swap)
3319-
{
3320-
narrowed = vrev64_u8(narrowed);
3321-
}
3322-
3323-
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3324-
mask &= 0x0101010101010101;
3325-
mask = mask | mask >> 7;
3326-
mask = mask | mask >> 14;
3327-
return (mask | mask >> 28) & 0xFF;
3323+
narrowed = vrev64_u8(narrowed);
33283324
}
3329-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
3330-
{
3331-
uint16x4_t narrowed = vmovn_u32(self);
3332-
XSIMD_IF_CONSTEXPR(do_swap)
3333-
{
3334-
narrowed = vrev64_u16(narrowed);
3335-
}
33363325

3337-
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(narrowed), 0);
3338-
mask &= 0x0001000100010001;
3339-
mask = mask | mask >> 15;
3340-
return (mask | mask >> 30) & 0xF;
3341-
}
3342-
else
3326+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(narrowed), 0);
3327+
mask &= 0x0101010101010101;
3328+
mask = mask | mask >> 7;
3329+
mask = mask | mask >> 14;
3330+
return (mask | mask >> 28) & 0xFF;
3331+
}
3332+
3333+
template <class A, class T, detail::enable_sized_t<T, 4> = 0>
3334+
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3335+
{
3336+
uint16x4_t narrowed = vmovn_u32(self);
3337+
XSIMD_IF_CONSTEXPR(detail::do_swap)
33433338
{
3344-
uint64_t mask_lo = vgetq_lane_u64(self, 0);
3345-
uint64_t mask_hi = vgetq_lane_u64(self, 1);
3346-
return ((mask_lo >> 63) | (mask_hi << 1)) & 0x3;
3339+
narrowed = vrev64_u16(narrowed);
33473340
}
3341+
3342+
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(narrowed), 0);
3343+
mask &= 0x0001000100010001;
3344+
mask = mask | mask >> 15;
3345+
return (mask | mask >> 30) & 0xF;
3346+
}
3347+
3348+
template <class A, class T, detail::enable_sized_t<T, 8> = 0>
3349+
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<neon>) noexcept
3350+
{
3351+
uint64_t mask_lo = vgetq_lane_u64(self, 0);
3352+
uint64_t mask_hi = vgetq_lane_u64(self, 1);
3353+
return ((mask_lo >> 63) | (mask_hi << 1)) & 0x3;
33483354
}
33493355
}
33503356

0 commit comments

Comments
 (0)