Skip to content

Commit ef28d70

Browse files
committed
Try out ARM patch
1 parent fc292ff commit ef28d70

File tree

1 file changed

+38
-4
lines changed

1 file changed

+38
-4
lines changed

include/xsimd/types/xsimd_neon_bool.hpp

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,14 +386,26 @@ namespace xsimd
386386

387387
static bool all(const batch_type& rhs)
388388
{
389+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
390+
return vminvq_u16(rhs) != 0;
391+
#else
389392
uint16x4_t tmp = vand_u16(vget_low_u16(rhs), vget_high_u16(rhs));
390-
return vget_lane_u16(vpmin_u16(tmp, tmp), 0) != 0;
393+
tmp = vpmin_u16(tmp, tmp);
394+
tmp = vpmin_u16(tmp, tmp);
395+
return vget_lane_u16(tmp, 0) != 0;
396+
#endif
391397
}
392398

393399
static bool any(const batch_type& rhs)
394400
{
401+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
402+
return vmaxvq_u16(rhs) != 0;
403+
#else
395404
uint16x4_t tmp = vorr_u16(vget_low_u16(rhs), vget_high_u16(rhs));
396-
return vget_lane_u16(vpmax_u16(tmp, tmp), 0);
405+
tmp = vpmax_u16(tmp, tmp);
406+
tmp = vpmax_u16(tmp, tmp);
407+
return vget_lane_u16(tmp, 0);
408+
#endif
397409
}
398410
};
399411
}
@@ -517,14 +529,22 @@ namespace xsimd
517529

518530
static bool all(const batch_type& rhs)
519531
{
532+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
533+
return vminvq_u32(rhs) != 0;
534+
#else
520535
uint32x2_t tmp = vand_u32(vget_low_u32(rhs), vget_high_u32(rhs));
521536
return vget_lane_u32(vpmin_u32(tmp, tmp), 0) != 0;
537+
#endif
522538
}
523539

524540
static bool any(const batch_type& rhs)
525541
{
542+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
543+
return vmaxvq_u32(rhs) != 0;
544+
#else
526545
uint32x2_t tmp = vorr_u32(vget_low_u32(rhs), vget_high_u32(rhs));
527546
return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
547+
#endif
528548
}
529549
};
530550
}
@@ -678,14 +698,28 @@ namespace xsimd
678698

679699
static bool all(const batch_type& rhs)
680700
{
701+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
702+
return vminvq_u8(rhs) != 0;
703+
#else
681704
uint8x8_t tmp = vand_u8(vget_low_u8(rhs), vget_high_u8(rhs));
682-
return vget_lane_u8(vpmin_u8(tmp, tmp), 0) != 0;
705+
tmp = vpmin_u8(tmp, tmp);
706+
tmp = vpmin_u8(tmp, tmp);
707+
tmp = vpmin_u8(tmp, tmp);
708+
return vget_lane_u8(tmp, 0) != 0;
709+
#endif
683710
}
684711

685712
static bool any(const batch_type& rhs)
686713
{
714+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
715+
return vmaxvq_u8(rhs) != 0;
716+
#else
687717
uint8x8_t tmp = vorr_u8(vget_low_u8(rhs), vget_high_u8(rhs));
688-
return vget_lane_u8(vpmax_u8(tmp, tmp), 0);
718+
tmp = vpmax_u8(tmp, tmp);
719+
tmp = vpmax_u8(tmp, tmp);
720+
tmp = vpmax_u8(tmp, tmp);
721+
return vget_lane_u8(tmp, 0);
722+
#endif
689723
}
690724
};
691725
}

0 commit comments

Comments
 (0)