-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[libc][math] Add float-only implementation for sinf / cosf. #161680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libc Author: None (lntue) ChangesBased on the double precision's sin/cos fast path algorithm: Step 1: Perform range reduction Step 2: Polynomial approximation Step 3: Combine the results using trig identities Overall errors: <= 3 ULPs for default rounding modes (tested exhaustively). Current limitation: large range reduction requires FMA instructions for binary32. This restriction will be removed in the followup PR. Patch is 25.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161680.diff 13 Files Affected:
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index 8e54e845de493..3913f7acb7f8c 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -77,9 +77,9 @@ LIBC_INLINE constexpr NumberPair<T> split(T a) {
NumberPair<T> r{0.0, 0.0};
// CN = 2^N.
constexpr T CN = static_cast<T>(1 << N);
- constexpr T C = CN + 1.0;
- double t1 = C * a;
- double t2 = a - t1;
+ constexpr T C = CN + T(1);
+ T t1 = C * a;
+ T t2 = a - t1;
r.hi = t1 + t2;
r.lo = a - r.hi;
return r;
@@ -144,8 +144,9 @@ LIBC_INLINE NumberPair<T> exact_mult(T a, T b) {
return r;
}
-LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
- DoubleDouble r = exact_mult(a, b.hi);
+template <typename T = double>
+LIBC_INLINE NumberPair<T> quick_mult(T a, const NumberPair<T> &b) {
+ NumberPair<T> r = exact_mult(a, b.hi);
r.lo = multiply_add(a, b.lo, r.lo);
return r;
}
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 98f9bb42f91f4..38632f0568139 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -833,6 +833,7 @@ add_header_library(
sincosf_utils
HDRS
sincosf_utils.h
+ sincosf_float_eval.h
DEPENDS
.range_reduction
libc.src.__support.FPUtil.fp_bits
diff --git a/libc/src/__support/math/cosf.h b/libc/src/__support/math/cosf.h
index 074be0b314637..6749fddf676b4 100644
--- a/libc/src/__support/math/cosf.h
+++ b/libc/src/__support/math/cosf.h
@@ -9,7 +9,6 @@
#ifndef LIBC_SRC___SUPPORT_MATH_COSF_H
#define LIBC_SRC___SUPPORT_MATH_COSF_H
-#include "sincosf_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/except_value_utils.h"
@@ -18,6 +17,26 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \
+ defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+
+#include "sincosf_float_eval.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+LIBC_INLINE static constexpr float cosf(float x) {
+ return sincosf_float_eval::sincosf_eval</*IS_SIN*/ false>(x);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#else // !LIBC_MATH_COSF_FLOAT_ONLY
+
+#include "sincosf_utils.h"
+
namespace LIBC_NAMESPACE_DECL {
namespace math {
@@ -51,7 +70,6 @@ LIBC_INLINE static constexpr float cosf(float x) {
xbits.set_sign(Sign::POS);
uint32_t x_abs = xbits.uintval();
- double xd = static_cast<double>(xbits.get_val());
// Range reduction:
// For |x| > pi/16, we perform range reduction as follows:
@@ -90,6 +108,7 @@ LIBC_INLINE static constexpr float cosf(float x) {
// computed using degree-7 and degree-6 minimax polynomials generated by
// Sollya respectively.
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// |x| < 0x1.0p-12f
if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
// When |x| < 2^-12, the relative error of the approximation cos(x) ~ 1
@@ -108,12 +127,12 @@ LIBC_INLINE static constexpr float cosf(float x) {
// emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
return fputil::multiply_add(xbits.get_val(), -0x1.0p-25f, 1.0f);
-#else
+#else // !LIBC_TARGET_CPU_HAS_FMA_FLOAT
+ double xd = static_cast<double>(xbits.get_val());
return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, 1.0));
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (auto r = COSF_EXCEPTS.lookup(x_abs); LIBC_UNLIKELY(r.has_value()))
return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -132,6 +151,7 @@ LIBC_INLINE static constexpr float cosf(float x) {
return x + FPBits::quiet_nan().get_val();
}
+ double xd = static_cast<double>(xbits.get_val());
// Combine the results with the sine of sum formula:
// cos(x) = cos((k + y)*pi/32)
// = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
@@ -150,3 +170,5 @@ LIBC_INLINE static constexpr float cosf(float x) {
} // namespace LIBC_NAMESPACE_DECL
#endif // LIBC_SRC___SUPPORT_MATH_COSF_H
+
+#endif // LIBC_MATH_COSF_FLOAT_ONLY
diff --git a/libc/src/__support/math/sincosf_float_eval.h b/libc/src/__support/math/sincosf_float_eval.h
new file mode 100644
index 0000000000000..2655343d32c4e
--- /dev/null
+++ b/libc/src/__support/math/sincosf_float_eval.h
@@ -0,0 +1,223 @@
+//===-- Compute sin + cos for small angles ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace sincosf_float_eval {
+
+// Since the worst case of `x mod pi` in single precision is > 2^-28, in order
+// to be bounded by 1 ULP, the range reduction accuracy will need to be at
+// lest 2^(-28 - 23) = 2^-51.
+// For fast small range reduction, we will compute as follow:
+// Let pi ~ c0 + c1 + c2
+// with |c1| < ulp(c0)/2 and |c2| < ulp(c1)/2
+// then:
+// k := nearest_int(x * 1/pi);
+// u = (x - k * c0) - k * c1 - k * c2
+// We requires k * c0, k * c1 to be exactly representable in single precision.
+// Let p_k be the precision of k, then the precision of c0 and c1 are:
+// 24 - p_k,
+// and the ulp of (k * c2) is 2^(-3 * (24 - p_k)).
+// This give us the following bound on the precision of k:
+// 3 * (24 - p_k) >= 51,
+// or equivalently:
+// p_k <= 7.
+// We set the bound for p_k to be 6 so that we can have some more wiggle room
+// for computations.
+LIBC_INLINE static unsigned sincosf_range_reduction_small(float x, float &u) {
+ // > display=hexadecimal;
+ // > a = round(pi/8, 18, RN);
+ // > b = round(pi/8 - a, 18, RN);
+ // > c = round(pi/8 - a - b, SG, RN);
+ // > round(8/pi, SG, RN);
+ constexpr float MPI[3] = {-0x1.921f8p-2f, -0x1.aa22p-21f, -0x1.68c234p-41f};
+ constexpr float ONE_OVER_PI = 0x1.45f306p+1f;
+ float prod_hi = x * ONE_OVER_PI;
+ float k = fputil::nearest_integer(prod_hi);
+
+ float y_hi = fputil::multiply_add(k, MPI[0], x); // Exact
+ u = fputil::multiply_add(k, MPI[1], y_hi);
+ u = fputil::multiply_add(k, MPI[2], u);
+ return static_cast<unsigned>(static_cast<int>(k));
+}
+
+// TODO: Add non-FMA version of large range reduction.
+LIBC_INLINE static unsigned sincosf_range_reduction_large(float x, float &u) {
+ // > for i from 0 to 13 do {
+ // if i < 2 then { pi_inv = 0.25 + 2^(8*(i - 2)) / pi; }
+ // else { pi_inv = 2^(8*(i-2)) / pi; };
+ // pn = nearestint(pi_inv);
+ // pi_frac = pi_inv - pn;
+ // a = round(pi_frac, SG, RN);
+ // b = round(pi_frac - a, SG, RN);
+ // c = round(pi_frac - a - b, SG, RN);
+ // d = round(pi_frac - a - b - c, SG, RN);
+ // print("{", 2^3 * a, ",", 2^3 * b, ",", 2^3 * c, ",", 2^3 * d, "},");
+ // };
+ constexpr float EIGHT_OVER_PI[14][4] = {
+ {0x1.000146p1f, -0x1.9f246cp-28f, -0x1.bbead6p-54f, -0x1.ec5418p-85f},
+ {0x1.0145f4p1f, -0x1.f246c6p-24f, -0x1.df56bp-49f, -0x1.ec5418p-77f},
+ {0x1.45f306p1f, 0x1.b9391p-24f, 0x1.529fc2p-50f, 0x1.d5f47ep-76f},
+ {0x1.f306dcp1f, 0x1.391054p-24f, 0x1.4fe13ap-49f, 0x1.7d1f54p-74f},
+ {-0x1.f246c6p0f, -0x1.df56bp-25f, -0x1.ec5418p-53f, 0x1.f534dep-78f},
+ {-0x1.236378p1f, 0x1.529fc2p-26f, 0x1.d5f47ep-52f, -0x1.65912p-77f},
+ {0x1.391054p0f, 0x1.4fe13ap-25f, 0x1.7d1f54p-50f, -0x1.6447e4p-75f},
+ {0x1.1054a8p0f, -0x1.ec5418p-29f, 0x1.f534dep-54f, -0x1.f924ecp-81f},
+ {0x1.529fc2p-2f, 0x1.d5f47ep-28f, -0x1.65912p-53f, 0x1.b6c52cp-79f},
+ {-0x1.ac07b2p1f, 0x1.5f47d4p-24f, 0x1.a6ee06p-49f, 0x1.b6295ap-74f},
+ {-0x1.ec5418p-5f, 0x1.f534dep-30f, -0x1.f924ecp-57f, 0x1.5993c4p-82f},
+ {0x1.3abe9p-1f, -0x1.596448p-27f, 0x1.b6c52cp-55f, -0x1.9b0ef2p-80f},
+ {-0x1.505c16p1f, 0x1.a6ee06p-25f, 0x1.b6295ap-50f, -0x1.b0ef1cp-76f},
+ {-0x1.70565ap-1f, 0x1.dc0db6p-26f, 0x1.4acc9ep-53f, 0x1.0e4108p-80f},
+ };
+
+ using FPBits = typename fputil::FPBits<float>;
+ using fputil::FloatFloat;
+ FPBits xbits(x);
+
+ int x_e_m32 = xbits.get_biased_exponent() - (FPBits::EXP_BIAS + 32);
+ unsigned idx = static_cast<unsigned>((x_e_m32 >> 3) + 2);
+ // Scale x down by 2^(-(8 * (idx - 2))
+ xbits.set_biased_exponent((x_e_m32 & 7) + FPBits::EXP_BIAS + 32);
+ // 2^32 <= |x_reduced| < 2^(32 + 8) = 2^40
+ float x_reduced = xbits.get_val();
+ // x * c_hi = ph.hi + ph.lo exactly.
+ FloatFloat ph = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][0]);
+ // x * c_mid = pm.hi + pm.lo exactly.
+ FloatFloat pm = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][1]);
+ // x * c_lo = pl.hi + pl.lo exactly.
+ FloatFloat pl = fputil::exact_mult<float>(x_reduced, EIGHT_OVER_PI[idx][2]);
+ // Extract integral parts and fractional parts of (ph.lo + pm.hi).
+ float sum_hi = ph.lo + pm.hi;
+ float k = fputil::nearest_integer(sum_hi);
+
+ // x * 8/pi mod 1 ~ y_hi + y_mid + y_lo
+ float y_hi = (ph.lo - k) + pm.hi; // Exact
+ FloatFloat y_mid = fputil::exact_add(pm.lo, pl.hi);
+ float y_lo = pl.lo;
+
+ // y_l = x * c_lo_2 + pl.lo
+ float y_l = fputil::multiply_add(x_reduced, EIGHT_OVER_PI[idx][3], y_lo);
+ FloatFloat y = fputil::exact_add(y_hi, y_mid.hi);
+ y.lo += (y_mid.lo + y_l);
+
+ // Digits of pi/8, generated by Sollya with:
+ // > a = round(pi/8, SG, RN);
+ // > b = round(pi/8 - SG, D, RN);
+ constexpr FloatFloat PI_OVER_8 = {-0x1.777a5cp-27f, 0x1.921fb6p-2f};
+
+ // Error bound: with {a} denote the fractional part of a, i.e.:
+ // {a} = a - round(a)
+ // Then,
+ // | {x * 8/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-47
+ // | {x mod pi/8} - (u.hi + u.lo) | < 2 * 2^-5 * 2^-47 = 2^-51
+ u = fputil::multiply_add(y.hi, PI_OVER_8.hi, y.lo * PI_OVER_8.hi);
+
+ return static_cast<unsigned>(static_cast<int>(k));
+}
+
+template <bool IS_SIN> LIBC_INLINE static float sincosf_eval(float x) {
+ // sin(k * pi/8) for k = 0..15, generated by Sollya with:
+ // > for k from 0 to 16 do {
+ // print(round(sin(k * pi/8), SG, RN));
+ // };
+ constexpr float SIN_K_PI_OVER_8[16] = {
+ 0.0f, 0x1.87de2ap-2f, 0x1.6a09e6p-1f, 0x1.d906bcp-1f,
+ 1.0f, 0x1.d906bcp-1f, 0x1.6a09e6p-1f, 0x1.87de2ap-2f,
+ 0.0f, -0x1.87de2ap-2f, -0x1.6a09e6p-1f, -0x1.d906bcp-1f,
+ -1.0f, -0x1.d906bcp-1f, -0x1.6a09e6p-1f, -0x1.87de2ap-2f,
+ };
+
+ using FPBits = fputil::FPBits<float>;
+ FPBits xbits(x);
+ uint32_t x_abs = cpp::bit_cast<uint32_t>(x) & 0x7fff'ffffU;
+
+ float y;
+ unsigned k = 0;
+ if (x_abs < 0x4880'0000U) {
+ k = sincosf_range_reduction_small(x, y);
+ } else {
+
+ if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ if (x_abs == 0x7f80'0000U) {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ }
+ return x + FPBits::quiet_nan().get_val();
+ }
+
+ k = sincosf_range_reduction_large(x, y);
+ }
+
+ float sin_k = SIN_K_PI_OVER_8[k & 15];
+ // cos(k * pi/8) = sin(k * pi/8 + pi/2) = sin((k + 4) * pi/8).
+ // cos_k = cos(k * pi/8)
+ float cos_k = SIN_K_PI_OVER_8[(k + 4) & 15];
+
+ float y_sq = y * y;
+
+ // Polynomial approximation of sin(y) and cos(y) for |y| <= pi/16:
+ //
+ // Using Taylor polynomial for sin(y):
+ // sin(y) ~ y - y^3 / 6 + y^5 / 120
+ // Using minimax polynomial generated by Sollya for cos(y) with:
+ // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]);
+ //
+ // Error bounds:
+ // * For sin(y)
+ // > P = x - SG(1/6)*x^3 + SG(1/120) * x^5;
+ // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]);
+ // 0x1.825...p-27
+ // * For cos(y)
+ // > Q = fpminimax(cos(x), [|0, 2, 4|], [|1, SG...|], [0, pi/16]);
+ // > dirtyinfnorm((sin(x) - P)/sin(x), [-pi/16, pi/16]);
+ // 0x1.aa8...p-29
+
+ // p1 = y^2 * 1/120 - 1/6
+ float p1 = fputil::multiply_add(y_sq, 0x1.111112p-7f, -0x1.555556p-3f);
+ // q1 = y^2 * coeff(Q, 4) + coeff(Q, 2)
+ float q1 = fputil::multiply_add(y_sq, 0x1.54b8bep-5f, -0x1.ffffc4p-2f);
+ float y3 = y_sq * y;
+ // c1 ~ cos(y)
+ float c1 = fputil::multiply_add(y_sq, q1, 1.0f);
+ // s1 ~ sin(y)
+ float s1 = fputil::multiply_add(y3, p1, y);
+
+ if constexpr (IS_SIN) {
+ // sin(x) = cos(k * pi/8) * sin(y) + sin(k * pi/8) * cos(y).
+ return fputil::multiply_add(cos_k, s1, sin_k * c1);
+ } else {
+ // cos(x) = cos(k * pi/8) * cos(y) - sin(k * pi/8) * sin(y).
+ return fputil::multiply_add(cos_k, c1, -sin_k * s1);
+ }
+}
+
+} // namespace sincosf_float_eval
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_FLOAT_EVAL_H
diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp
index a8e634ce2f5ac..28759c1402c3e 100644
--- a/libc/src/math/generic/sinf.cpp
+++ b/libc/src/math/generic/sinf.cpp
@@ -17,13 +17,30 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) && \
+ defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
+
+#include "sincosf_float_eval.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
+ return math::sincosf_float_eval::sincosf_eval</*IS_SIN*/ true>(x);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#else // !LIBC_MATH_SINF_FLOAT_ONLY
+
#include "src/__support/math/sincosf_utils.h"
-#if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#include "src/__support/math/range_reduction_fma.h"
-#else
+#else // !LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#include "src/__support/math/range_reduction.h"
-#endif
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
namespace LIBC_NAMESPACE_DECL {
@@ -162,3 +179,4 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
}
} // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_MATH_SINF_FLOAT_ONLY
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 2d2d5287bb384..3480ea7e1c838 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -16,6 +16,20 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ cosf_float_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ cosf_float_test.cpp
+ DEPENDS
+ libc.src.__support.math.sincosf_utils
+ libc.src.__support.FPUtil.fp_bits
+ FLAGS
+ FMA_OPT__ONLY
+)
+
add_fp_unittest(
cos_test
NEED_MPFR
@@ -96,6 +110,20 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ sinf_float_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ sinf_float_test.cpp
+ DEPENDS
+ libc.src.__support.math.sincosf_utils
+ libc.src.__support.FPUtil.fp_bits
+ FLAGS
+ FMA_OPT__ONLY
+)
+
add_fp_unittest(
sinf16_test
NEED_MPFR
diff --git a/libc/test/src/math/cosf_float_test.cpp b/libc/test/src/math/cosf_float_test.cpp
new file mode 100644
index 0000000000000..3d573b211f4b4
--- /dev/null
+++ b/libc/test/src/math/cosf_float_test.cpp
@@ -0,0 +1,35 @@
+//===-- Unittests for cosf float-only -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+#include "hdr/stdint_proxy.h"
+
+using LlvmLibcCosfFloatTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+float cosf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ false>(x);
+}
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+TEST_F(LlvmLibcCosfFloatTest, InFloatRange) {
+ constexpr uint32_t COUNT = 100'000;
+ constexpr uint32_t STEP = UINT32_MAX / COUNT;
+ for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+ float x = FPBits(v).get_val();
+ if (FPBits(v).is_nan() || FPBits(v).is_inf())
+ continue;
+ ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cosf_fast(x), 3.5);
+ }
+}
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 07c36f424a0c3..4720211116df4 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -42,6 +42,21 @@ add_fp_unittest(
-lpthread
)
+add_fp_unittest(
+ sinf_float_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ sinf_float_test.cpp
+ LINK_LIBRARIES
+ -lpthread
+ DEPENDS
+ .exhaustive_test
+ libc.src.__support.math.sincosf_utils
+)
+
add_fp_unittest(
sinpif_test
NO_RUN_POSTBUILD
@@ -74,6 +89,21 @@ add_fp_unittest(
-lpthread
)
+add_fp_unittest(
+ cosf_float_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ cosf_float_test.cpp
+ LINK_LIBRARIES
+ -lpthread
+ DEPENDS
+ .exhaustive_test
+ libc.src.__support.math.sincosf_utils
+)
+
add_fp_unittest(
cospif_test
NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/cosf_float_test.cpp b/libc/test/src/math/exhaustive/cosf_float_test.cpp
new file mode 100644
index 0000000000000..0c3a98832c84d
--- /dev/null
+++ b/libc/test/src/math/exhaustive/cosf_float_test.cpp
@@ -0,0 +1,44 @@
+//===-- Exhaustive test for cosf - float-only -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/math/sincosf_float_eval.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+float cosf_fast(float x) {
+ return LIBC_NAMESPACE::math::sincosf_float_eval::sincosf_eval<
+ /*IS_SIN*/ false>(x);
+}
+
+using LlvmLibcCosfExhaustiveTest =
+ LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Cos, cosf_fast,
+ 3>;
+
+// Range: [0, Inf];
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcCosfExhaustiveTest, PostiveRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << POS_START << ", 0x" << POS_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP);
+}
+
+// Range: [-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcCosfExhaustiveTest, NegativeRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP);
+}
diff --git a/libc/test/src/math/exhaustive/exhaustive_test.h b/libc/test/src/math/exhaustive/exhaustive_test.h
index 8b...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall looks good
Based on the double precision's sin/cos fast path algorithm:
Step 1: Perform range reduction
y = x mod pi/8
with target errors < 2^-54.This is because the worst case mod pi/8 for single precision is ~2^-31, so to have up to 1 ULP errors from
the range reduction, the targeted errors should
be 2^(-31 - 23) = 2^-54
.Step 2: Polynomial approximation
We use degree-5 and degree-4 polynomials to approximate sin and cos of the reduced angle respectively.
Step 3: Combine the results using trig identities
Overall errors: <= 3 ULPs for default rounding modes (tested exhaustively).
Current limitation: large range reduction requires FMA instructions for binary32. This restriction will be removed in the followup PR.