@@ -991,10 +991,19 @@ namespace xsimd
991991 return dispatcher.apply (register_type (lhs), register_type (rhs));
992992 }
993993
994- template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
994+ template <class A , class T , detail::enable_sized_signed_t <T, 8 > = 0 >
995+ XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
996+ {
997+ using register_type = typename batch<T, A>::register_type;
998+ return batch_bool<T, A>(vshrq_n_s64 (vqsubq_s64 (register_type (rhs), register_type (lhs)), 63 ));
999+ }
1000+
1001+ template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
9951002 XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
9961003 {
997- return batch_bool<T, A>({ lhs.get (0 ) < rhs.get (0 ), lhs.get (1 ) < rhs.get (1 ) });
1004+ using register_type = typename batch<T, A>::register_type;
1005+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1006+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (lhs), register_type (rhs)), acc)), 63 ))));
9981007 }
9991008
10001009 /* *****
@@ -1035,6 +1044,23 @@ namespace xsimd
10351044 {
10361045 return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (arg)));
10371046 }
1047+
1048+ XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
1049+ {
1050+ return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
1051+ }
1052+
1053+ template <class V >
1054+ XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1055+ {
1056+ const neon_dispatcher::unary dispatcher = {
1057+ std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1058+ wrap::vmvnq_u32, wrap::vmvnq_s32,
1059+ bitwise_not_u64, bitwise_not_s64,
1060+ bitwise_not_f32)
1061+ };
1062+ return dispatcher.apply (arg);
1063+ }
10381064 }
10391065
10401066 WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type)
@@ -1055,14 +1081,15 @@ namespace xsimd
10551081 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10561082 {
10571083 using register_type = typename batch<T, A>::register_type;
1058- return vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 );
1084+ return batch_bool<T, A>( vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 ) );
10591085 }
10601086
10611087 template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
10621088 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10631089 {
10641090 using register_type = typename batch<T, A>::register_type;
1065- return detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs))), 63 ));
1091+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1092+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs)), acc)), 63 ))));
10661093 }
10671094
10681095 /* *****
@@ -1086,7 +1113,7 @@ namespace xsimd
10861113 template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
10871114 XSIMD_INLINE batch_bool<T, A> ge (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10881115 {
1089- return batch_bool<T, A>({ lhs. get ( 0 ) >= rhs. get ( 0 ), lhs. get ( 1 ) >= rhs. get ( 1 ) } );
1116+ return detail::bitwise_not_neon ( lt ( rhs, lt, A {}) );
10901117 }
10911118
10921119 /* ******************
@@ -1236,26 +1263,6 @@ namespace xsimd
12361263
12371264 WRAP_UNARY_INT_EXCLUDING_64 (vmvnq)
12381265
1239- namespace detail
1240- {
1241- XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
1242- {
1243- return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
1244- }
1245-
1246- template <class V >
1247- XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1248- {
1249- const neon_dispatcher::unary dispatcher = {
1250- std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1251- wrap::vmvnq_u32, wrap::vmvnq_s32,
1252- bitwise_not_u64, bitwise_not_s64,
1253- bitwise_not_f32)
1254- };
1255- return dispatcher.apply (arg);
1256- }
1257- }
1258-
12591266 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
12601267 XSIMD_INLINE batch<T, A> bitwise_not (batch<T, A> const & arg, requires_arch<neon>) noexcept
12611268 {
0 commit comments