diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index 1c5a1108ff9e0..4c8f34a435bdf 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -17,7 +17,7 @@ #include "src/__support/macros/properties/architectures.h" #include "src/errno/libc_errno.h" -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP) #if defined(__APPLE__) #include "aarch64/fenv_darwin_impl.h" #else diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h index 5d0deddd751f5..768f13414bd95 100644 --- a/libc/src/__support/FPUtil/nearest_integer.h +++ b/libc/src/__support/FPUtil/nearest_integer.h @@ -16,7 +16,7 @@ #if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2)) #include "x86_64/nearest_integer.h" -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)) #include "aarch64/nearest_integer.h" #elif defined(LIBC_TARGET_ARCH_IS_GPU) diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index 89da44ff2970f..1d377ab9a4e2d 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt(double x) { // Use inline assembly when __builtin_elementwise_sqrt is not available. #if defined(LIBC_TARGET_CPU_HAS_SSE2) #include "x86_64/sqrt.h" -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP) #include "aarch64/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_ARM) #include "arm/sqrt.h" diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h index e41ac202dbaac..66d24378095b9 100644 --- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h +++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h @@ -19,9 +19,43 @@ namespace LIBC_NAMESPACE_DECL { -[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1, - CPtr p2, - size_t count) { +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) { + if (LIBC_LIKELY(count < 16)) { + switch (count) { + case 0: + return BcmpReturnType::zero(); + case 1: + return generic::Bcmp::block(p1, p2); + case 2: + return generic::Bcmp::block(p1, p2); + case 3: + return generic::Bcmp::head_tail(p1, p2, count); + case 4: + return generic::Bcmp::block(p1, p2); + case 5: + case 6: + case 7: + return generic::Bcmp::head_tail(p1, p2, count); + case 8: + return generic::Bcmp::block(p1, p2); + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + return generic::Bcmp::head_tail(p1, p2, count); + } + } + + return generic::Bcmp::loop_and_tail_align_above(256, p1, p2, count); +} + +#ifdef __ARM_NEON +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) { if (LIBC_LIKELY(count <= 32)) { if (LIBC_UNLIKELY(count >= 16)) { return aarch64::Bcmp<16>::head_tail(p1, p2, count); @@ -65,6 +99,16 @@ namespace LIBC_NAMESPACE_DECL { } return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count); } +#endif + +[[gnu::flatten]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) { +#if defined(__ARM_NEON) + return inline_bcmp_aarch64_with_fp(p1, p2, count); +#else + return inline_bcmp_aarch64_no_fp(p1, p2, count); +#endif +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h index 35ca077dab526..380ebb410efb5 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h +++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h @@ -17,17 +17,40 @@ namespace LIBC_NAMESPACE_DECL { [[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = generic::Memcmp::block(p1, p2)) - return value; - align_to_next_boundary<16, Arg::P1>(p1, p2, count); - } - return generic::Memcmp::loop_and_tail(p1, p2, count); +inline_memcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) { + if (count == 0) + return MemcmpReturnType::zero(); + if (count == 1) + return generic::Memcmp::block(p1, p2); + if (count == 2) + return generic::Memcmp::block(p1, p2); + if (count == 3) + return generic::MemcmpSequence::block(p1, p2); + if (count <= 8) + return generic::Memcmp::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp::head_tail(p1, p2, count); + + return generic::Memcmp::loop_and_tail_align_above(384, p1, p2, + count); } +#if defined(__ARM_NEON) [[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { +inline_memcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) { + if (count == 0) + return MemcmpReturnType::zero(); + if (count == 1) + return generic::Memcmp::block(p1, p2); + if (count == 2) + return generic::Memcmp::block(p1, p2); + if (count == 3) + return generic::MemcmpSequence::block(p1, p2); + if (count <= 8) + return generic::Memcmp::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp::head_tail(p1, p2, count); + if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞] if (auto value = generic::Memcmp::block(p1, p2)) return value; @@ -46,25 +69,15 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { return generic::Memcmp::loop_and_tail(p1 + 32, p2 + 32, count - 32); } +#endif -LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2, - size_t count) { - if (count == 0) - return MemcmpReturnType::zero(); - if (count == 1) - return generic::Memcmp::block(p1, p2); - if (count == 2) - return generic::Memcmp::block(p1, p2); - if (count == 3) - return generic::MemcmpSequence::block(p1, p2); - if (count <= 8) - return generic::Memcmp::head_tail(p1, p2, count); - if (count <= 16) - return generic::Memcmp::head_tail(p1, p2, count); - if constexpr (aarch64::kNeon) - return inline_memcmp_aarch64_neon_gt16(p1, p2, count); - else - return inline_memcmp_generic_gt16(p1, p2, count); +[[gnu::flatten]] LIBC_INLINE MemcmpReturnType +inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) { +#if defined(__ARM_NEON) + return inline_memcmp_aarch64_with_fp(p1, p2, count); +#else + return inline_memcmp_aarch64_no_fp(p1, p2, count); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h index 2b238031af49d..d8d276966fd27 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memmove.h +++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h @@ -8,8 +8,7 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon +#include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" #include "src/string/memory_utils/utils.h" @@ -19,7 +18,6 @@ namespace LIBC_NAMESPACE_DECL { LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) { - static_assert(aarch64::kNeon, "aarch64 supports vector types"); using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = generic_v512; diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h index efcbfd0705983..1b4b871792c69 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memset.h +++ b/libc/src/string/memory_utils/aarch64/inline_memset.h @@ -18,12 +18,12 @@ namespace LIBC_NAMESPACE_DECL { +using uint128_t = generic_v128; +using uint256_t = generic_v256; +using uint512_t = generic_v512; + [[maybe_unused]] LIBC_INLINE static void -inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { - static_assert(aarch64::kNeon, "aarch64 supports vector types"); - using uint128_t = generic_v128; - using uint256_t = generic_v256; - using uint512_t = generic_v512; +inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) { if (count == 0) return; if (count <= 3) { @@ -46,15 +46,57 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { generic::Memset::tail(dst, value, count); return; } + + generic::Memset::block(dst, value); + align_to_next_boundary<16>(dst, count); + return generic::Memset::loop_and_tail(dst, value, count); +} + +#if defined(__ARM_NEON) +[[maybe_unused]] LIBC_INLINE static void +inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) { + if (count == 0) + return; + if (count <= 3) { + generic::Memset::block(dst, value); + if (count > 1) + generic::Memset::tail(dst, value, count); + return; + } + if (count <= 8) + return generic::Memset::head_tail(dst, value, count); + if (count <= 16) + return generic::Memset::head_tail(dst, value, count); + if (count <= 32) + return generic::Memset::head_tail(dst, value, count); + if (count <= (32 + 64)) { + generic::Memset::block(dst, value); + if (count <= 64) + return generic::Memset::tail(dst, value, count); + generic::Memset::block(dst + 32, value); + generic::Memset::tail(dst, value, count); + return; + } + if (count >= 448 && value == 0 && aarch64::neon::hasZva()) { generic::Memset::block(dst, 0); align_to_next_boundary<64>(dst, count); return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count); - } else { - generic::Memset::block(dst, value); - align_to_next_boundary<16>(dst, count); - return generic::Memset::loop_and_tail(dst, value, count); } + + generic::Memset::block(dst, value); + align_to_next_boundary<16>(dst, count); + return generic::Memset::loop_and_tail(dst, value, count); +} +#endif + +[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void +inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) { +#if defined(__ARM_NEON) + return inline_memset_aarch64_with_fp(dst, value, count); +#else + return inline_memset_aarch64_no_fp(dst, value, count); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h index 3c1dc808cc5ce..955d764aade2b 100644 --- a/libc/src/string/memory_utils/inline_bcmp.h +++ b/libc/src/string/memory_utils/inline_bcmp.h @@ -21,7 +21,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_bcmp.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_bcmp.h" #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv diff --git a/libc/src/string/memory_utils/inline_memcmp.h b/libc/src/string/memory_utils/inline_memcmp.h index a2ca9afd7f79d..85a614b2fb95e 100644 --- a/libc/src/string/memory_utils/inline_memcmp.h +++ b/libc/src/string/memory_utils/inline_memcmp.h @@ -20,7 +20,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_memcmp.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_memcmp.h" #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h index aed37071265aa..fd9c29ea4410a 100644 --- a/libc/src/string/memory_utils/inline_memset.h +++ b/libc/src/string/memory_utils/inline_memset.h @@ -20,7 +20,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_memset.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_memset.h" #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h index 868c64474c0b4..e552601fbb708 100644 --- a/libc/src/string/memory_utils/op_aarch64.h +++ b/libc/src/string/memory_utils/op_aarch64.h @@ -25,7 +25,6 @@ #ifdef __ARM_NEON #include -#endif //__ARM_NEON namespace LIBC_NAMESPACE_DECL { namespace aarch64 { @@ -176,6 +175,8 @@ template struct Bcmp { } // namespace aarch64 } // namespace LIBC_NAMESPACE_DECL +#endif //__ARM_NEON + namespace LIBC_NAMESPACE_DECL { namespace generic { @@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset) { return MemcmpReturnType::zero(); } +#if defined(__ARM_NEON) + /////////////////////////////////////////////////////////////////////////////// // Specializations for uint8x16_t template <> struct is_vector : cpp::true_type {}; @@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, } return MemcmpReturnType::zero(); } + +#endif // __ARM_NEON + } // namespace generic } // namespace LIBC_NAMESPACE_DECL