@@ -991,10 +991,19 @@ namespace xsimd
991991 return dispatcher.apply (register_type (lhs), register_type (rhs));
992992 }
993993
994- template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
994+ template <class A , class T , detail::enable_sized_signed_t <T, 8 > = 0 >
995995 XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
996996 {
997- return batch_bool<T, A>({ lhs.get (0 ) < rhs.get (0 ), lhs.get (1 ) < rhs.get (1 ) });
997+ using register_type = typename batch<T, A>::register_type;
998+ return batch_bool<T, A>(vshrq_n_s64 (vqsubq_s64 (register_type (rhs), register_type (lhs)), 63 ));
999+ }
1000+
1001+ template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
1002+ XSIMD_INLINE batch_bool<T, A> lt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
1003+ {
1004+ using register_type = typename batch<T, A>::register_type;
1005+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1006+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (lhs), register_type (rhs)), acc)), 63 ))));
9981007 }
9991008
10001009 /* *****
@@ -1024,6 +1033,8 @@ namespace xsimd
10241033 /* *****
10251034 * gt *
10261035 ******/
1036+ WRAP_UNARY_INT_EXCLUDING_64 (vmvnq)
1037+
10271038 namespace detail
10281039 {
10291040 XSIMD_INLINE int64x2_t bitwise_not_s64 (int64x2_t arg) noexcept
@@ -1035,6 +1046,23 @@ namespace xsimd
10351046 {
10361047 return vreinterpretq_u64_u32 (vmvnq_u32 (vreinterpretq_u32_u64 (arg)));
10371048 }
1049+
1050+ XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
1051+ {
1052+ return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
1053+ }
1054+
1055+ template <class V >
1056+ XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1057+ {
1058+ const neon_dispatcher::unary dispatcher = {
1059+ std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1060+ wrap::vmvnq_u32, wrap::vmvnq_s32,
1061+ bitwise_not_u64, bitwise_not_s64,
1062+ bitwise_not_f32)
1063+ };
1064+ return dispatcher.apply (arg);
1065+ }
10381066 }
10391067
10401068 WRAP_BINARY_INT_EXCLUDING_64 (vcgtq, detail::comp_return_type)
@@ -1055,14 +1083,15 @@ namespace xsimd
10551083 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10561084 {
10571085 using register_type = typename batch<T, A>::register_type;
1058- return vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 );
1086+ return batch_bool<T, A>( vshrq_n_s64 (vqsubq_s64 (register_type (lhs), register_type (rhs)), 63 ) );
10591087 }
10601088
10611089 template <class A , class T , detail::enable_sized_unsigned_t <T, 8 > = 0 >
10621090 XSIMD_INLINE batch_bool<T, A> gt (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10631091 {
10641092 using register_type = typename batch<T, A>::register_type;
1065- return detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs))), 63 ));
1093+ register_type acc = { 0x8FFFFFFFFFFFFFFFull , 0x8FFFFFFFFFFFFFFFull };
1094+ return batch_bool<T, A>(vreinterpretq_u64_s64 (detail::bitwise_not_s64 (vshrq_n_s64 (vreinterpretq_s64_u64 (vqaddq_u64 (vqsubq_u64 (register_type (rhs), register_type (lhs)), acc)), 63 ))));
10661095 }
10671096
10681097 /* *****
@@ -1086,7 +1115,7 @@ namespace xsimd
10861115 template <class A , class T , detail::enable_sized_integral_t <T, 8 > = 0 >
10871116 XSIMD_INLINE batch_bool<T, A> ge (batch<T, A> const & lhs, batch<T, A> const & rhs, requires_arch<neon>) noexcept
10881117 {
1089- return batch_bool<T, A>({ lhs. get ( 0 ) >= rhs. get ( 0 ), lhs. get ( 1 ) >= rhs. get ( 1 ) } );
1118+ return detail::bitwise_not_neon ( lt ( rhs, lt, A {}) );
10901119 }
10911120
10921121 /* ******************
@@ -1234,28 +1263,6 @@ namespace xsimd
12341263 * bitwise_not *
12351264 ***************/
12361265
1237- WRAP_UNARY_INT_EXCLUDING_64 (vmvnq)
1238-
1239- namespace detail
1240- {
1241- XSIMD_INLINE float32x4_t bitwise_not_f32 (float32x4_t arg) noexcept
1242- {
1243- return vreinterpretq_f32_u32 (vmvnq_u32 (vreinterpretq_u32_f32 (arg)));
1244- }
1245-
1246- template <class V >
1247- XSIMD_INLINE V bitwise_not_neon (V const & arg) noexcept
1248- {
1249- const neon_dispatcher::unary dispatcher = {
1250- std::make_tuple (wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1251- wrap::vmvnq_u32, wrap::vmvnq_s32,
1252- bitwise_not_u64, bitwise_not_s64,
1253- bitwise_not_f32)
1254- };
1255- return dispatcher.apply (arg);
1256- }
1257- }
1258-
12591266 template <class A , class T , detail::enable_neon_type_t <T> = 0 >
12601267 XSIMD_INLINE batch<T, A> bitwise_not (batch<T, A> const & arg, requires_arch<neon>) noexcept
12611268 {
0 commit comments