@@ -973,6 +973,42 @@ namespace xsimd
973973
974974 }
975975
976+ /*
977+ * bitwise not generic utility
978+ */
979+
980+ WRAP_UNARY_INT_EXCLUDING_64 (vmvnq)
981+
982+ namespace detail
983+ {
984+ XSIMD_INLINE int64x2_t bitwise_not_s64 (int64x2_t arg) noexcept
985+ {
986+ return vreinterpretq_s64_s32 (vmvnq_s32 (vreinterpretq_s32_s64 (arg)));
987+ }
988+
989+ XSIMD_INLINE uint64x2_t bitwise_not_u64 (uint64x2_t arg) noexcept
990+ {
991+ return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (arg)));
992+ }
993+
994+ XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
995+ {
996+ return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
997+ }
998+
999+ template <class V >
1000+ XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1001+ {
1002+ const neon_dispatcher::unary dispatcher = {
1003+ std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1004+ wrap::vmvnq_u32, wrap::vmvnq_s32,
1005+ bitwise_not_u64, bitwise_not_s64,
1006+ bitwise_not_f32)
1007+ };
1008+ return dispatcher.apply (arg);
1009+ }
1010+ }
1011+
9761012 /* *****
9771013 * lt *
9781014 ******/
@@ -991,10 +1027,19 @@ namespace xsimd
9911027 return dispatcher.apply (register_type (lhs), register_type (rhs));
9921028 }
9931029
994- template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
1030+ template <class A , class T , detail::enable_sized_signed_t <T, 8 > = 0 >
1031+ XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
1032+ {
1033+ using register_type = typename batch<T, A>::register_type;
1034+ return batch_bool<T, A>(vshrq_n_s64 (vqsubq_s64 (register_type (rhs), register_type (lhs)), 63 ));
1035+ }
1036+
1037+ template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
9951038 XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
9961039 {
997- return batch_bool<T, A>({ lhs.get (0 ) < rhs.get (0 ), lhs.get (1 ) < rhs.get (1 ) });
1040+ using register_type = typename batch<T, A>::register_type;
1041+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1042+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (lhs), register_type (rhs)), acc)), 63 ))));
9981043 }
9991044
10001045 /* *****
@@ -1024,18 +1069,6 @@ namespace xsimd
10241069 /* *****
10251070 * gt *
10261071 ******/
1027- namespace detail
1028- {
1029- XSIMD_INLINE int64x2_t bitwise_not_s64 (int64x2_t arg) noexcept
1030- {
1031- return vreinterpretq_s64_s32 (vmvnq_s32 (vreinterpretq_s32_s64 (arg)));
1032- }
1033-
1034- XSIMD_INLINE uint64x2_t bitwise_not_u64 (uint64x2_t arg) noexcept
1035- {
1036- return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (arg)));
1037- }
1038- }
10391072
10401073 WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type)
10411074 WRAP_BINARY_FLOAT (vcgtq, detail::comp_return_type)
@@ -1055,14 +1088,15 @@ namespace xsimd
10551088 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10561089 {
10571090 using register_type = typename batch<T, A>::register_type;
1058- return vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 );
1091+ return batch_bool<T, A>( vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 ) );
10591092 }
10601093
10611094 template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
10621095 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10631096 {
10641097 using register_type = typename batch<T, A>::register_type;
1065- return detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs))), 63 ));
1098+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1099+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs)), acc)), 63 ))));
10661100 }
10671101
10681102 /* *****
@@ -1086,7 +1120,7 @@ namespace xsimd
10861120 template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
10871121 XSIMD_INLINE batch_bool<T, A> ge (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10881122 {
1089- return batch_bool<T, A>({ lhs. get ( 0 ) >= rhs. get ( 0 ) , lhs. get ( 1 ) >= rhs. get ( 1 ) } );
1123+ return detail::bitwise_not_neon ( lt ( rhs, lhs, A {}) );
10901124 }
10911125
10921126 /* ******************
@@ -1234,28 +1268,6 @@ namespace xsimd
12341268 * bitwise_not *
12351269 ***************/
12361270
1237- WRAP_UNARY_INT_EXCLUDING_64 (vmvnq)
1238-
1239- namespace detail
1240- {
1241- XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
1242- {
1243- return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
1244- }
1245-
1246- template <class V >
1247- XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1248- {
1249- const neon_dispatcher::unary dispatcher = {
1250- std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1251- wrap::vmvnq_u32, wrap::vmvnq_s32,
1252- bitwise_not_u64, bitwise_not_s64,
1253- bitwise_not_f32)
1254- };
1255- return dispatcher.apply (arg);
1256- }
1257- }
1258-
12591271 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
12601272 XSIMD_INLINE batch<T, A> bitwise_not (batch<T, A> const & arg, requires_arch<neon>) noexcept
12611273 {
0 commit comments