Skip to content

Commit 34218a1

Browse files
WIP
1 parent 540e1b2 commit 34218a1

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

include/xsimd/arch/xsimd_neon.hpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,18 @@ namespace xsimd
10241024
/******
10251025
* gt *
10261026
******/
1027+
namespace detail
1028+
{
1029+
XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept
1030+
{
1031+
return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(arg)));
1032+
}
1033+
1034+
XSIMD_INLINE uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept
1035+
{
1036+
return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg)));
1037+
}
1038+
}
10271039

10281040
WRAP_BINARY_INT_EXCLUDING_64(vcgtq, detail::comp_return_type)
10291041
WRAP_BINARY_FLOAT(vcgtq, detail::comp_return_type)
@@ -1039,10 +1051,17 @@ namespace xsimd
10391051
return dispatcher.apply(register_type(lhs), register_type(rhs));
10401052
}
10411053

1042-
template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
1054+
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
1055+
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
1056+
{
1057+
using register_type = typename batch<T, A>::register_type;
1058+
return vshrq_n_s64(vqsubq_s64(register_type(lhs), register_type(rhs)), 63);
1059+
}
1060+
1061+
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
10431062
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
10441063
{
1045-
return batch_bool<T, A>({ lhs.get(0) > rhs.get(0), lhs.get(1) > rhs.get(1) });
1064+
return detail::bitwise_not_s64(vshrq_n_s64(vreinterpretq_s64_u64(vqsubq_u64(register_type(rhs), register_type(lhs))), 63));
10461065
}
10471066

10481067
/******
@@ -1218,16 +1237,6 @@ namespace xsimd
12181237

12191238
namespace detail
12201239
{
1221-
XSIMD_INLINE int64x2_t bitwise_not_s64(int64x2_t arg) noexcept
1222-
{
1223-
return vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(arg)));
1224-
}
1225-
1226-
XSIMD_INLINE uint64x2_t bitwise_not_u64(uint64x2_t arg) noexcept
1227-
{
1228-
return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(arg)));
1229-
}
1230-
12311240
XSIMD_INLINE float32x4_t bitwise_not_f32(float32x4_t arg) noexcept
12321241
{
12331242
return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(arg)));

0 commit comments

Comments
 (0)