-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[libc] Provide more fine-grained control of FMA instruction for ARM targets. #130700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-libc Author: None (lntue) ChangesPatch is 42.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130700.diff 41 Files Affected:
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index 1e40d06dc1462..2cafb4c0974e3 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -24,6 +24,8 @@ LIBC_INLINE OutType fma(InType x, InType y, InType z) {
}
#ifdef LIBC_TARGET_CPU_HAS_FMA
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
template <> LIBC_INLINE float fma(float x, float y, float z) {
#if __has_builtin(__builtin_elementwise_fma)
return __builtin_elementwise_fma(x, y, z);
@@ -31,7 +33,9 @@ template <> LIBC_INLINE float fma(float x, float y, float z) {
return __builtin_fmaf(x, y, z);
#endif
}
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
template <> LIBC_INLINE double fma(double x, double y, double z) {
#if __has_builtin(__builtin_elementwise_fma)
return __builtin_elementwise_fma(x, y, z);
@@ -39,6 +43,7 @@ template <> LIBC_INLINE double fma(double x, double y, double z) {
return __builtin_fma(x, y, z);
#endif
}
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#endif // LIBC_TARGET_CPU_HAS_FMA
} // namespace fputil
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index b24ffd4aa456f..c27885aadc028 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -100,6 +100,26 @@ LIBC_INLINE NumberPair<T> exact_mult(const NumberPair<T> &as, T a, T b) {
return r;
}
+// The templated exact multiplication needs template version of
+// LIBC_TARGET_CPU_HAS_FMA_* macro to correctly select the implementation.
+// These can be moved to "src/__support/macros/properties/cpu_features.h" if
+// other part of libc needed.
+template <typename T> struct TargetHasFmaInstruction {
+ static constexpr bool VALUE = false;
+};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+template <> struct TargetHasFmaInstruction<float> {
+ static constexpr bool VALUE = true;
+};
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+template <> struct TargetHasFmaInstruction<double> {
+ static constexpr bool VALUE = true;
+};
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
// Note: When FMA instruction is not available, the `exact_mult` function is
// only correct for round-to-nearest mode. See:
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
@@ -111,15 +131,15 @@ template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
NumberPair<T> r{0.0, 0.0};
-#ifdef LIBC_TARGET_CPU_HAS_FMA
- r.hi = a * b;
- r.lo = fputil::multiply_add(a, b, -r.hi);
-#else
- // Dekker's Product.
- NumberPair<T> as = split(a);
+ if constexpr (TargetHasFmaInstruction<T>::VALUE) {
+ r.hi = a * b;
+ r.lo = fputil::multiply_add(a, b, -r.hi);
+ } else {
+ // Dekker's Product.
+ NumberPair<T> as = split(a);
- r = exact_mult<T, SPLIT_B>(as, a, b);
-#endif // LIBC_TARGET_CPU_HAS_FMA
+ r = exact_mult<T, SPLIT_B>(as, a, b);
+ }
return r;
}
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index ae00e08673d08..8260702e2c9f4 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -46,6 +46,7 @@ multiply_add(T x, T y, T z) {
namespace LIBC_NAMESPACE_DECL {
namespace fputil {
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
LIBC_INLINE float multiply_add(float x, float y, float z) {
#if __has_builtin(__builtin_elementwise_fma)
return __builtin_elementwise_fma(x, y, z);
@@ -53,7 +54,9 @@ LIBC_INLINE float multiply_add(float x, float y, float z) {
return __builtin_fmaf(x, y, z);
#endif
}
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
LIBC_INLINE double multiply_add(double x, double y, double z) {
#if __has_builtin(__builtin_elementwise_fma)
return __builtin_elementwise_fma(x, y, z);
@@ -61,6 +64,7 @@ LIBC_INLINE double multiply_add(double x, double y, double z) {
return __builtin_fma(x, y, z);
#endif
}
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index d2cea367516db..1714775ca334d 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -45,6 +45,21 @@
#if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \
defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA)
#define LIBC_TARGET_CPU_HAS_FMA
+// Provide a more fine-grained control of FMA instruction for ARM targets.
+#if defined(__ARM_FP)
+#if (__ARM_FP & 0x2)
+#define LIBC_TARGET_CPU_HAS_FMA_HALF
+#endif // LIBC_TARGET_CPU_HAS_FMA_HALF
+#if (__ARM_FP & 0x4)
+#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
+#if (__ARM_FP & 0x8)
+#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+#else
+#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
+#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+#endif
#endif
#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp
index 3a89def8f6e0c..b54a9e7b2b00b 100644
--- a/libc/src/math/generic/asinf.cpp
+++ b/libc/src/math/generic/asinf.cpp
@@ -74,12 +74,12 @@ LLVM_LIBC_FUNCTION(float, asinf, (float x)) {
// |x| < 2^-125. For targets without FMA instructions, we simply use
// double for intermediate results as it is more efficient than using an
// emulated version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(x, 0x1.0p-25f, x);
#else
double xd = static_cast<double>(x);
return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
// Check for exceptional values
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index 5ac2b29438ea9..726cae9c8462b 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -131,7 +131,7 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
num_r = num_d;
den_r = den_d;
}
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
q.lo = fputil::multiply_add(q.hi, -den_r, num_r) / den_r;
#else
// Compute `(num_r - q.hi * den_r) / den_r` accurately without FMA
@@ -140,7 +140,7 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
double t1 = fputil::multiply_add(q_hi_dd.hi, -den_r, num_r); // Exact
double t2 = fputil::multiply_add(q_hi_dd.lo, -den_r, t1);
q.lo = t2 / den_r;
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
// Taylor polynomial, evaluating using Horner's scheme:
// P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15
diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp
index 5e0788efbeb88..46196dbe4162c 100644
--- a/libc/src/math/generic/atanf.cpp
+++ b/libc/src/math/generic/atanf.cpp
@@ -52,12 +52,12 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
return x;
// x <= 2^-12;
if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(x, -0x1.0p-25f, x);
#else
double x_d = static_cast<double>(x);
return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
// Use Taylor polynomial:
// atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp
index ee7d69b2c211f..ce227e6650c84 100644
--- a/libc/src/math/generic/cbrt.cpp
+++ b/libc/src/math/generic/cbrt.cpp
@@ -58,7 +58,7 @@ double intial_approximation(double x) {
// Get the error term for Newton iteration:
// h(x) = x^3 * a^2 - 1,
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) {
return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) +
fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo);
diff --git a/libc/src/math/generic/cos.cpp b/libc/src/math/generic/cos.cpp
index 568b1254c6f02..b60082bf9c308 100644
--- a/libc/src/math/generic/cos.cpp
+++ b/libc/src/math/generic/cos.cpp
@@ -20,11 +20,11 @@
#include "src/math/generic/range_reduction_double_common.h"
#include "src/math/generic/sincos_eval.h"
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#include "range_reduction_double_fma.h"
#else
#include "range_reduction_double_nofma.h"
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp
index 972ffa923aedf..23e3db067e669 100644
--- a/libc/src/math/generic/cosf.cpp
+++ b/libc/src/math/generic/cosf.cpp
@@ -101,11 +101,11 @@ LLVM_LIBC_FUNCTION(float, cosf, (float x)) {
// |x| < 2^-125. For targets without FMA instructions, we simply use
// double for intermediate results as it is more efficient than using an
// emulated version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f);
#else
return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0));
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
if (auto r = COSF_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value()))
diff --git a/libc/src/math/generic/cospif.cpp b/libc/src/math/generic/cospif.cpp
index 4ef1539539921..29566f4fceacf 100644
--- a/libc/src/math/generic/cospif.cpp
+++ b/libc/src/math/generic/cospif.cpp
@@ -50,11 +50,11 @@ LLVM_LIBC_FUNCTION(float, cospif, (float x)) {
// The exhautive test passes for smaller values
if (LIBC_UNLIKELY(x_abs < 0x38A2'F984U)) {
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f);
#else
return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0));
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
// Numbers greater or equal to 2^23 are always integers or NaN
diff --git a/libc/src/math/generic/exp10f16.cpp b/libc/src/math/generic/exp10f16.cpp
index 006dd5c554428..f2002e9f146c0 100644
--- a/libc/src/math/generic/exp10f16.cpp
+++ b/libc/src/math/generic/exp10f16.cpp
@@ -26,7 +26,7 @@
namespace LIBC_NAMESPACE_DECL {
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
static constexpr size_t N_EXP10F16_EXCEPTS = 5;
#else
static constexpr size_t N_EXP10F16_EXCEPTS = 8;
@@ -44,7 +44,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS>
{0xbf0aU, 0x2473U, 1U, 0U, 0U},
// x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ)
{0xc387U, 0x09a5U, 1U, 0U, 0U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ)
{0x4030U, 0x57c1U, 1U, 0U, 1U},
// x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ)
diff --git a/libc/src/math/generic/exp10m1f16.cpp b/libc/src/math/generic/exp10m1f16.cpp
index 449aedf254ca5..41e2c2bb14b04 100644
--- a/libc/src/math/generic/exp10m1f16.cpp
+++ b/libc/src/math/generic/exp10m1f16.cpp
@@ -34,7 +34,7 @@ static constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{
{0x9788U, 0x9c53U, 0U, 1U, 0U},
}};
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3;
#else
static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6;
@@ -49,7 +49,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI>
{0x3657U, 0x3df6U, 1U, 0U, 0U},
// x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ)
{0x3741U, 0x3f5cU, 1U, 0U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ)
{0x4030U, 0x57b1U, 1U, 0U, 1U},
// x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ)
diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp
index 2c612777c9cb5..726f88b6457fc 100644
--- a/libc/src/math/generic/exp2.cpp
+++ b/libc/src/math/generic/exp2.cpp
@@ -35,11 +35,11 @@ using LIBC_NAMESPACE::operator""_u128;
// Error bounds:
// Errors when using double precision.
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
constexpr double ERR_D = 0x1.0p-63;
#else
constexpr double ERR_D = 0x1.8p-63;
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Errors when using double-double precision.
diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp
index 6a1cd2328a050..eceb76f1893e2 100644
--- a/libc/src/math/generic/exp2m1f16.cpp
+++ b/libc/src/math/generic/exp2m1f16.cpp
@@ -40,7 +40,7 @@ static constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
{0x973fU, 0x9505U, 0U, 1U, 0U},
}};
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
#else
static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
@@ -51,13 +51,13 @@ static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
// (input, RZ output, RU offset, RD offset, RN offset)
// x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
{0x3396U, 0x31b7U, 1U, 0U, 0U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
{0x34baU, 0x3345U, 1U, 0U, 0U},
#endif
// x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
{0x36b6U, 0x3566U, 1U, 0U, 0U},
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
{0x37b7U, 0x3659U, 1U, 0U, 1U},
#endif
@@ -67,7 +67,7 @@ static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
{0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
// x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
{0xb8a5U, 0xb54cU, 0U, 1U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
{0xba8dU, 0xb6edU, 0U, 1U, 1U},
#endif
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index d5e9e85ed4bd3..655f0e6246676 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -38,14 +38,14 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
return 0x1.8dbe62p-3f;
}
-#if !defined(LIBC_TARGET_CPU_HAS_FMA)
+#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
int round_mode = fputil::quick_get_round();
if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
return -0x1.71c884p-4f;
return -0x1.71c882p-4f;
}
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
// When |x| > 25*log(2), or nan
if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) {
@@ -102,12 +102,12 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// 2^-76. For targets without FMA instructions, we simply use double for
// intermediate results as it is more efficient than using an emulated
// version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
- return fputil::fma<float>(x, x, x);
+#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+ return fputil::multiply_add<float>(x, x, x);
#else
double xd = x;
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
constexpr double COEFFS[] = {0x1p-1,
diff --git a/libc/src/math/generic/expm1f16.cpp b/libc/src/math/generic/expm1f16.cpp
index 4ce0efd1f461b..bfd263eaa9cb0 100644
--- a/libc/src/math/generic/expm1f16.cpp
+++ b/libc/src/math/generic/expm1f16.cpp
@@ -29,7 +29,7 @@ static constexpr fputil::ExceptValues<float16, 1> EXPM1F16_EXCEPTS_LO = {{
{0x2959U, 0x2975U, 1U, 0U, 1U},
}};
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 2;
#else
static constexpr size_t N_EXPM1F16_EXCEPTS_HI = 3;
@@ -42,7 +42,7 @@ static constexpr fputil::ExceptValues<float16, N_EXPM1F16_EXCEPTS_HI>
{0x3f0dU, 0x44d3U, 1U, 0U, 1U},
// x = -0x1.e28p-3, expm1f16(x) = -0x1.adcp-3 (RZ)
{0xb38aU, 0xb2b7U, 0U, 1U, 1U},
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.a08p-3, exp10m1f(x) = 0x1.cdcp-3 (RZ)
{0x3282U, 0x3337U, 1U, 0U, 0U},
#endif
diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index e759e48cd6989..daad64873f27a 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -21,7 +21,7 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
// correctly rounded for all rounding modes, so we fall
// back to the generic `fmul` implementation
-#ifndef LIBC_TARGET_CPU_HAS_FMA
+#ifndef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
return fputil::generic::mul<float>(x, y);
#else
fputil::DoubleDouble prod = fputil::exact_mult(x, y);
diff --git a/libc/src/math/generic/hypotf.cpp b/libc/src/math/generic/hypotf.cpp
index 959c0420ae149..ec48f62163a48 100644
--- a/libc/src/math/generic/hypotf.cpp
+++ b/libc/src/math/generic/hypotf.cpp
@@ -55,7 +55,7 @@ LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
// These squares are exact.
double a_sq = ad * ad;
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
double sum_sq = fputil::multiply_add(bd, bd, a_sq);
#else
double b_sq = bd * bd;
@@ -72,7 +72,7 @@ LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
double r_d = result.get_val();
// Perform rounding correction.
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq);
double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
#else
diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp
index 04eebab975cd5..0cd4424ee0baf 100644
--- a/libc/src/math/generic/log.cpp
+++ b/libc/src/math/generic/log.cpp
@@ -800,13 +800,13 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
fputil::DoubleDouble r1;
// Perform exact range reduction
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
u = fputil::multiply_add(r, m, -1.0); // exact
#else
uint64_t c_m = x_m & 0x3FFF'E000'0000'0000ULL;
double c = FPBits_t(c_m).get_val();
u = fputil::multiply_add(r, m - c, CD[index]); // exact
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
// Exact sum:
// r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp
index fd8d5a8aae938..1c4e559ba083c 100644
--- a/libc/src/math/generic/log10.cpp
+++ b/libc/src/math/generic/log10.cpp
@@ -802,13 +802,13 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) {
fputil::DoubleDouble r1;
// Perform exact range reduction
-#ifdef LIBC_TARGET_CPU_HAS_FMA
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
u = fputil::multiply_add(r, m, -1.0); // exact
#else
uint64_t c_m = x_m & 0x3FFF'E000'0000'0000ULL;
double c = FPBits_t(c_m).get_val();
u = fputil::multiply_add(r, m - c, CD[index]); // exact
-#endif // LIBC_TARGET_CPU_HAS_FMA
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
// Error of u_sq = ulp(u^2);
u_sq = u * u;
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp
index c635fa4ef9b63..73ca26374e4a3 100644
--- a/libc/src/math/generic/log10f.cpp
+++ b/libc/src/math/generic/log10f.cpp
@@ -145,7 +145,7 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
return fputil::round_result_slightly_up(-0x1.dd2...
[truncated]
|
jhuber6
approved these changes
Mar 11, 2025
Contributor
jhuber6
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems pretty mechanical, guessing this is for architectures that may have one and not the other.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.