From 3f76cdfe25a02df1a4371edae1eb16555be5260a Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Sat, 5 Apr 2025 12:20:55 -0400 Subject: [PATCH 1/6] [libc] Extend fputil::sqrt to use floating point instruction for arm32 CPUs if FPUs are available. --- libc/src/__support/FPUtil/aarch64/sqrt.h | 11 ++++++++--- libc/src/__support/FPUtil/sqrt.h | 2 +- .../__support/macros/properties/cpu_features.h | 18 +++++++++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/libc/src/__support/FPUtil/aarch64/sqrt.h b/libc/src/__support/FPUtil/aarch64/sqrt.h index b69267ff91f5c..cfd3579f621d0 100644 --- a/libc/src/__support/FPUtil/aarch64/sqrt.h +++ b/libc/src/__support/FPUtil/aarch64/sqrt.h @@ -12,8 +12,9 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" -#if !defined(LIBC_TARGET_ARCH_IS_AARCH64) +#if !defined(LIBC_TARGET_ARCH_IS_ANY_ARM) #error "Invalid include" #endif @@ -22,17 +23,21 @@ namespace LIBC_NAMESPACE_DECL { namespace fputil { +#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT template <> LIBC_INLINE float sqrt(float x) { float y; - __asm__ __volatile__("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x)); + asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x)); return y; } +#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE template <> LIBC_INLINE double sqrt(double x) { double y; - __asm__ __volatile__("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x)); + asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x)); return y; } +#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE } // namespace fputil } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index eb86ddfa89d8e..ca9890600168f 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -14,7 +14,7 @@ #if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2) #include "x86_64/sqrt.h" -#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) +#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM) #include "aarch64/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "riscv/sqrt.h" diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index 1714775ca334d..47dd8a03613e7 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -42,18 +42,30 @@ #define LIBC_TARGET_CPU_HAS_AVX512BW #endif +#if defined(__ARM_FP) +#if (__ARM_FP & 0x2) +#define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF +#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF +#if (__ARM_FP & 0x4) +#define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#if (__ARM_FP & 0x8) +#define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE +#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE +#endif // __ARM_FP + #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \ defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA) #define LIBC_TARGET_CPU_HAS_FMA // Provide a more fine-grained control of FMA instruction for ARM targets. #if defined(__ARM_FP) -#if (__ARM_FP & 0x2) +#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_HALF) #define LIBC_TARGET_CPU_HAS_FMA_HALF #endif // LIBC_TARGET_CPU_HAS_FMA_HALF -#if (__ARM_FP & 0x4) +#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT) #define LIBC_TARGET_CPU_HAS_FMA_FLOAT #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT -#if (__ARM_FP & 0x8) +#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE) #define LIBC_TARGET_CPU_HAS_FMA_DOUBLE #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE #else From 3be70f13494be4eb5c8a6899ae2094a315c259d4 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Sat, 5 Apr 2025 14:11:50 -0400 Subject: [PATCH 2/6] Use __builtin_elementwise_sqrt if available. --- libc/src/__support/FPUtil/aarch64/sqrt.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libc/src/__support/FPUtil/aarch64/sqrt.h b/libc/src/__support/FPUtil/aarch64/sqrt.h index cfd3579f621d0..9462fc16f3f4c 100644 --- a/libc/src/__support/FPUtil/aarch64/sqrt.h +++ b/libc/src/__support/FPUtil/aarch64/sqrt.h @@ -25,17 +25,25 @@ namespace fputil { #ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT template <> LIBC_INLINE float sqrt(float x) { +#if __has_builtin(__builtin_elementwise_sqrt) + return __builtin_elementwise_sqrt(x); +#else float y; asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x)); return y; +#endif // __builtin_elementwise_sqrt } #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT #ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE template <> LIBC_INLINE double sqrt(double x) { +#if __has_builtin(__builtin_elementwise_sqrt) + return __builtin_elementwise_sqrt(x); +#else double y; asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x)); return y; +#endif // __builtin_elementwise_sqrt } #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE From b97174a320c43d131faa341871106354daad9ccd Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Sun, 6 Apr 2025 12:10:11 -0400 Subject: [PATCH 3/6] Move __builtin_elementwise_sqrt to generic, and add GPUs, RISCV support. --- .../__support/FPUtil/{aarch64 => arm}/sqrt.h | 24 ++++-------- libc/src/__support/FPUtil/riscv/sqrt.h | 11 +++--- libc/src/__support/FPUtil/sqrt.h | 37 +++++++++++++++++-- libc/src/__support/FPUtil/x86_64/sqrt.h | 2 - .../macros/properties/cpu_features.h | 37 +++++++++++++++---- .../llvm-project-overlay/libc/BUILD.bazel | 3 +- 6 files changed, 76 insertions(+), 38 deletions(-) rename libc/src/__support/FPUtil/{aarch64 => arm}/sqrt.h (61%) diff --git a/libc/src/__support/FPUtil/aarch64/sqrt.h b/libc/src/__support/FPUtil/arm/sqrt.h similarity index 61% rename from libc/src/__support/FPUtil/aarch64/sqrt.h rename to libc/src/__support/FPUtil/arm/sqrt.h index 9462fc16f3f4c..39ac5395f869e 100644 --- a/libc/src/__support/FPUtil/aarch64/sqrt.h +++ b/libc/src/__support/FPUtil/arm/sqrt.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H -#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H +#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H +#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H #include "src/__support/common.h" #include "src/__support/macros/config.h" @@ -18,36 +18,26 @@ #error "Invalid include" #endif -#include "src/__support/FPUtil/generic/sqrt.h" - namespace LIBC_NAMESPACE_DECL { namespace fputil { -#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT template <> LIBC_INLINE float sqrt(float x) { -#if __has_builtin(__builtin_elementwise_sqrt) - return __builtin_elementwise_sqrt(x); -#else float y; asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x)); return y; -#endif // __builtin_elementwise_sqrt } -#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT -#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE +#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE template <> LIBC_INLINE double sqrt(double x) { -#if __has_builtin(__builtin_elementwise_sqrt) - return __builtin_elementwise_sqrt(x); -#else double y; asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x)); return y; -#endif // __builtin_elementwise_sqrt } -#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE +#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE } // namespace fputil } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H +#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H diff --git a/libc/src/__support/FPUtil/riscv/sqrt.h b/libc/src/__support/FPUtil/riscv/sqrt.h index 0363822a4e8af..d5d0d9bb5111e 100644 --- a/libc/src/__support/FPUtil/riscv/sqrt.h +++ b/libc/src/__support/FPUtil/riscv/sqrt.h @@ -12,31 +12,30 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" #if !defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #error "Invalid include" #endif -#include "src/__support/FPUtil/generic/sqrt.h" - namespace LIBC_NAMESPACE_DECL { namespace fputil { -#ifdef __riscv_flen +#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT template <> LIBC_INLINE float sqrt(float x) { float result; __asm__ __volatile__("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x)); return result; } +#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT -#if __riscv_flen >= 64 +#if LIBC_TARGET_CPU_HAS_FPU_DOUBLE template <> LIBC_INLINE double sqrt(double x) { double result; __asm__ __volatile__("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x)); return result; } -#endif // __riscv_flen >= 64 -#endif // __riscv_flen +#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT } // namespace fputil } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index ca9890600168f..4819935b8b667 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -12,14 +12,43 @@ #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/cpu_features.h" -#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2) +#include "src/__support/FPUtil/generic/sqrt.h" + +// Generic instruction specializations with __builtin_elementwise_sqrt. +#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT) || \ + defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE) + +#if __has_builtin(__builtin_elementwise_sqrt) + +namespace LIBC_NAMESPACE_DECL { +namespace fputil { + +#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT +template <> LIBC_INLINE float sqrt(float x) { + return __builtin_elementwise_sqrt(x); +} +#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT + +#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE +template <> LIBC_INLINE double sqrt(double x) { + return __builtin_elementwise_sqrt(x); +} +#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE + +} // namespace fputil +} // namespace LIBC_NAMESPACE_DECL + +#else +// Use inline assembly when __builtin_elementwise_sqrt is not available. +#if defined(LIBC_TARGET_CPU_HAS_SSE2) #include "x86_64/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM) -#include "aarch64/sqrt.h" +#include "arm/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "riscv/sqrt.h" -#else -#include "generic/sqrt.h" + +#endif // __builtin_elementwise_sqrt #endif + #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H diff --git a/libc/src/__support/FPUtil/x86_64/sqrt.h b/libc/src/__support/FPUtil/x86_64/sqrt.h index e10447751d216..eae40cc8829f1 100644 --- a/libc/src/__support/FPUtil/x86_64/sqrt.h +++ b/libc/src/__support/FPUtil/x86_64/sqrt.h @@ -18,8 +18,6 @@ #error "sqrtss / sqrtsd need SSE2" #endif -#include "src/__support/FPUtil/generic/sqrt.h" - namespace LIBC_NAMESPACE_DECL { namespace fputil { diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index 47dd8a03613e7..bdc08e79f173e 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -20,6 +20,8 @@ #if defined(__SSE2__) #define LIBC_TARGET_CPU_HAS_SSE2 +#define LIBC_TARGET_CPU_HAS_FPU_FLOAT +#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE #endif #if defined(__SSE4_2__) @@ -45,33 +47,52 @@ #if defined(__ARM_FP) #if (__ARM_FP & 0x2) #define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF +#define LIBC_TARGET_CPU_HAS_FPU_HALF #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF #if (__ARM_FP & 0x4) #define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT +#define LIBC_TARGET_CPU_HAS_FPU_FLOAT #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT #if (__ARM_FP & 0x8) #define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE +#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE #endif // __ARM_FP +#if defined(__riscv_flen) +// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc +#if (__riscv_flen & 0x20) +#define LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF +#define LIBC_TARGET_CPU_HAS_FPU_HALF +#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF +#if (__riscv_flen & 0x40) +#define LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT +#define LIBC_TARGET_CPU_HAS_FPU_FLOAT +#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT +#if (__riscv_flen & 0x80) +#define LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE +#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE +#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE +#endif // __riscv_flen + +#if defined(__NVPTX__) || defined(__AMDGPU__) +#define LIBC_TARGET_CPU_HAS_FPU_FLOAT +#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE +#endif + #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \ defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA) #define LIBC_TARGET_CPU_HAS_FMA // Provide a more fine-grained control of FMA instruction for ARM targets. -#if defined(__ARM_FP) -#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_HALF) +#if defined(LIBC_TARGET_CPU_HAS_FPU_HALF) #define LIBC_TARGET_CPU_HAS_FMA_HALF #endif // LIBC_TARGET_CPU_HAS_FMA_HALF -#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT) +#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT) #define LIBC_TARGET_CPU_HAS_FMA_FLOAT #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT -#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE) +#if defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE) #define LIBC_TARGET_CPU_HAS_FMA_DOUBLE #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#else -#define LIBC_TARGET_CPU_HAS_FMA_FLOAT -#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#endif #endif #if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index a9357485d3a10..dcb4d53f9dad0 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1179,7 +1179,7 @@ sqrt_hdrs = selects.with_or({ "src/__support/FPUtil/x86_64/sqrt.h", ], PLATFORM_CPU_ARM64: sqrt_common_hdrs + [ - "src/__support/FPUtil/aarch64/sqrt.h", + "src/__support/FPUtil/arm/sqrt.h", ], }) @@ -1195,6 +1195,7 @@ libc_support_library( ":__support_fputil_fenv_impl", ":__support_fputil_fp_bits", ":__support_fputil_rounding_mode", + ":__support_macros_properties_cpu_features", ":__support_uint128", ], ) From 0590dbad8579ebf12f41c11637e2a4d2cbfd1d2b Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Sun, 6 Apr 2025 12:15:29 -0400 Subject: [PATCH 4/6] Add missing #endif. --- libc/src/__support/FPUtil/sqrt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index 4819935b8b667..bf0a99ff94225 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -46,9 +46,10 @@ template <> LIBC_INLINE double sqrt(double x) { #include "arm/sqrt.h" #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "riscv/sqrt.h" +#endif // Target specific header of inline asm. #endif // __builtin_elementwise_sqrt -#endif +#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT or DOUBLE #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H From 70927af8c721097881a11e722f69be51dfad124c Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Sun, 6 Apr 2025 12:24:32 -0400 Subject: [PATCH 5/6] Fix __riscv_flen mask. --- libc/src/__support/macros/properties/cpu_features.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index bdc08e79f173e..3677e1fc3275c 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -61,15 +61,15 @@ #if defined(__riscv_flen) // https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc -#if (__riscv_flen & 0x20) +#if (__riscv_flen & 0x10) #define LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF #define LIBC_TARGET_CPU_HAS_FPU_HALF #endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF -#if (__riscv_flen & 0x40) +#if (__riscv_flen & 0x20) #define LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT #define LIBC_TARGET_CPU_HAS_FPU_FLOAT #endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT -#if (__riscv_flen & 0x80) +#if (__riscv_flen & 0x40) #define LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE #define LIBC_TARGET_CPU_HAS_FPU_DOUBLE #endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE From 51014dd71fd2bee31422560a648bc6338b9656b0 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 8 Apr 2025 19:17:43 -0400 Subject: [PATCH 6/6] Address comments. --- libc/src/__support/FPUtil/riscv/sqrt.h | 4 ++-- libc/src/__support/FPUtil/sqrt.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/__support/FPUtil/riscv/sqrt.h b/libc/src/__support/FPUtil/riscv/sqrt.h index d5d0d9bb5111e..694451d02e2b1 100644 --- a/libc/src/__support/FPUtil/riscv/sqrt.h +++ b/libc/src/__support/FPUtil/riscv/sqrt.h @@ -24,7 +24,7 @@ namespace fputil { #ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT template <> LIBC_INLINE float sqrt(float x) { float result; - __asm__ __volatile__("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x)); + asm("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x)); return result; } #endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT @@ -32,7 +32,7 @@ template <> LIBC_INLINE float sqrt(float x) { #if LIBC_TARGET_CPU_HAS_FPU_DOUBLE template <> LIBC_INLINE double sqrt(double x) { double result; - __asm__ __volatile__("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x)); + asm("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x)); return result; } #endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h index bf0a99ff94225..9b151c4c5e1b3 100644 --- a/libc/src/__support/FPUtil/sqrt.h +++ b/libc/src/__support/FPUtil/sqrt.h @@ -38,7 +38,7 @@ template <> LIBC_INLINE double sqrt(double x) { } // namespace fputil } // namespace LIBC_NAMESPACE_DECL -#else +#else // __builtin_elementwise_sqrt // Use inline assembly when __builtin_elementwise_sqrt is not available. #if defined(LIBC_TARGET_CPU_HAS_SSE2) #include "x86_64/sqrt.h"