Skip to content

Commit 626ca89

Browse files
WIP
1 parent fdcc012 commit 626ca89

File tree

1 file changed

+34
-27
lines changed

1 file changed

+34
-27
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -991,10 +991,19 @@ namespace xsimd
991991
return dispatcher.apply(register_type(lhs), register_type(rhs));
992992
}
993993

994-
template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
994+
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
995995
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
996996
{
997-
return batch_bool<T, A>({ lhs.get(0) < rhs.get(0), lhs.get(1) < rhs.get(1) });
997+
using register_type = typename batch<T, A>::register_type;
998+
return batch_bool<T, A>(vshrq_n_s64(vqsubq_s64(register_type(rhs), register_type(lhs)), 63));
999+
}
1000+
1001+
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
1002+
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1003+
{
1004+
using register_type = typename batch<T, A>::register_type;
1005+
register_type acc = { 0x8FFFFFFFFFFFFFFFull, 0x8FFFFFFFFFFFFFFFull };
1006+
return batch_bool<T, A>(vreinterpretq_u64_s64(detail::bitwise_not_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqaddq_u64(vqsubq_u64(register_type(lhs), register_type(rhs)), acc)), 63))));
9981007
}
9991008

10001009
/******
@@ -1024,6 +1033,8 @@ namespace xsimd
10241033
/******
10251034
* gt *
10261035
******/
1036+
WRAP_UNARY_INT_EXCLUDING_64(vmvnq)
1037+
10271038
namespace detail
10281039
{
10291040
XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept
@@ -1035,6 +1046,23 @@ namespace xsimd
10351046
{
10361047
return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg)));
10371048
}
1049+
1050+
XSIMD_INLINE float32x4_t bitwise_not_f32(float32x4_t arg) noexcept
1051+
{
1052+
return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(arg)));
1053+
}
1054+
1055+
template <class V>
1056+
XSIMD_INLINE V bitwise_not_neon(V const& arg) noexcept
1057+
{
1058+
const neon_dispatcher::unary dispatcher = {
1059+
std::make_tuple(wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1060+
wrap::vmvnq_u32, wrap::vmvnq_s32,
1061+
bitwise_not_u64, bitwise_not_s64,
1062+
bitwise_not_f32)
1063+
};
1064+
return dispatcher.apply(arg);
1065+
}
10381066
}
10391067

10401068
WRAP_BINARY_INT_EXCLUDING_64(vcgtq, detail::comp_return_type)
@@ -1055,14 +1083,15 @@ namespace xsimd
10551083
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
10561084
{
10571085
using register_type = typename batch<T, A>::register_type;
1058-
return vshrq_n_s64(vqsubq_s64(register_type(lhs), register_type(rhs)), 63);
1086+
return batch_bool<T, A>(vshrq_n_s64(vqsubq_s64(register_type(lhs), register_type(rhs)), 63));
10591087
}
10601088

10611089
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
10621090
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
10631091
{
10641092
using register_type = typename batch<T, A>::register_type;
1065-
return detail::bitwise_not_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqsubq_u64(register_type(rhs), register_type(lhs))), 63));
1093+
register_type acc = { 0x8FFFFFFFFFFFFFFFull, 0x8FFFFFFFFFFFFFFFull };
1094+
return batch_bool<T, A>(vreinterpretq_u64_s64(detail::bitwise_not_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqaddq_u64(vqsubq_u64(register_type(rhs), register_type(lhs)), acc)), 63))));
10661095
}
10671096

10681097
/******
@@ -1086,7 +1115,7 @@ namespace xsimd
10861115
template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
10871116
XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
10881117
{
1089-
return batch_bool<T, A>({ lhs.get(0) >= rhs.get(0), lhs.get(1) >= rhs.get(1) });
1118+
return detail::bitwise_not_neon(lt(rhs, lt, A {}));
10901119
}
10911120

10921121
/*******************
@@ -1234,28 +1263,6 @@ namespace xsimd
12341263
* bitwise_not *
12351264
***************/
12361265

1237-
WRAP_UNARY_INT_EXCLUDING_64(vmvnq)
1238-
1239-
namespace detail
1240-
{
1241-
XSIMD_INLINE float32x4_t bitwise_not_f32(float32x4_t arg) noexcept
1242-
{
1243-
return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(arg)));
1244-
}
1245-
1246-
template <class V>
1247-
XSIMD_INLINE V bitwise_not_neon(V const& arg) noexcept
1248-
{
1249-
const neon_dispatcher::unary dispatcher = {
1250-
std::make_tuple(wrap::vmvnq_u8, wrap::vmvnq_s8, wrap::vmvnq_u16, wrap::vmvnq_s16,
1251-
wrap::vmvnq_u32, wrap::vmvnq_s32,
1252-
bitwise_not_u64, bitwise_not_s64,
1253-
bitwise_not_f32)
1254-
};
1255-
return dispatcher.apply(arg);
1256-
}
1257-
}
1258-
12591266
template <class A, class T, detail::enable_neon_type_t<T> = 0>
12601267
XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& arg, requires_arch<neon>) noexcept
12611268
{

0 commit comments

Comments
 (0)