Skip to content

[libc][math] Refactor atan implementation to header-only in src/__support/math folder. #150852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/shared/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "math/asinf16.h"
#include "math/asinhf.h"
#include "math/asinhf16.h"
#include "math/atan.h"
#include "math/erff.h"
#include "math/exp.h"
#include "math/exp10.h"
Expand Down
23 changes: 23 additions & 0 deletions libc/shared/math/atan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Shared atan function ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SHARED_MATH_ATAN_H
#define LLVM_LIBC_SHARED_MATH_ATAN_H

#include "shared/libc_common.h"
#include "src/__support/math/atan.h"

namespace LIBC_NAMESPACE_DECL {
namespace shared {

using math::atan;

} // namespace shared
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SHARED_MATH_ATAN_H
27 changes: 27 additions & 0 deletions libc/src/__support/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,33 @@ DEPENDS
libc.src.__support.macros.optimization
)

add_header_library(
atan_utils
HDRS
atan_utils.h
DEPENDS
libc.src.__support.integer_literals
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.macros.optimization
)

add_header_library(
atan
HDRS
atan.h
DEPENDS
.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.macros.optimization
)

add_header_library(
asinf
HDRS
Expand Down
189 changes: 189 additions & 0 deletions libc/src/__support/math/atan.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
//===-- Implementation header for atan --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H

#include "atan_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE_DECL {

namespace math {

// To compute atan(x), we divided it into the following cases:
// * |x| < 2^-26:
// Since |x| > atan(|x|) > |x| - |x|^3/3, and |x|^3/3 < ulp(x)/2, we simply
// return atan(x) = x - sign(x) * epsilon.
// * 2^-26 <= |x| < 1:
// We perform range reduction mod 2^-6 = 1/64 as follow:
// Let k = 2^(-6) * round(|x| * 2^6), then
// atan(x) = sign(x) * atan(|x|)
// = sign(x) * (atan(k) + atan((|x| - k) / (1 + |x|*k)).
// We store atan(k) in a look up table, and perform intermediate steps in
// double-double.
// * 1 < |x| < 2^53:
// First we perform the transformation y = 1/|x|:
// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
// = sign(x) * (pi/2 - atan(y)).
// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
// previous case:
// Let k = 2^(-6) * round(y * 2^6), then
// atan(y) = atan(k) + atan((y - k) / (1 + y*k))
// = atan(k) + atan((1/|x| - k) / (1 + k/|x|)
// = atan(k) + atan((1 - k*|x|) / (|x| + k)).
// * |x| >= 2^53:
// Using the reciprocal transformation:
// atan(x) = sign(x) * (pi/2 - atan(1/|x|)).
// We have that:
// atan(1/|x|) <= 1/|x| <= 2^-53,
// which is smaller than ulp(pi/2) / 2.
// So we can return:
// atan(x) = sign(x) * (pi/2 - epsilon)

LIBC_INLINE static constexpr double atan(double x) {

using namespace atan_internal;
using FPBits = fputil::FPBits<double>;

constexpr double IS_NEG[2] = {1.0, -1.0};
constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
0x1.921fb54442d18p0};
constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
-0x1.921fb54442d18p0};

FPBits xbits(x);
bool x_sign = xbits.is_neg();
xbits = xbits.abs();
uint64_t x_abs = xbits.uintval();
int x_exp =
static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;

// |x| < 1.
if (x_exp < 0) {
if (LIBC_UNLIKELY(x_exp < -26)) {
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return x;
#else
if (x == 0.0)
return x;
// |x| < 2^-26
return fputil::multiply_add(-0x1.0p-54, x, x);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

double x_d = xbits.get_val();
// k = 2^-6 * round(2^6 * |x|)
double k = fputil::nearest_integer(0x1.0p6 * x_d);
unsigned idx = static_cast<unsigned>(k);
k *= 0x1.0p-6;

// numerator = |x| - k
DoubleDouble num, den;
num.lo = 0.0;
num.hi = x_d - k;

// denominator = 1 - k * |x|
den.hi = fputil::multiply_add(x_d, k, 1.0);
DoubleDouble prod = fputil::exact_mult(x_d, k);
// Using Dekker's 2SUM algorithm to compute the lower part.
den.lo = ((1.0 - den.hi) + prod.hi) + prod.lo;

// x_r = (|x| - k) / (1 + k * |x|)
DoubleDouble x_r = fputil::div(num, den);

// Approximating atan(x_r) using Taylor polynomial.
DoubleDouble p = atan_eval(x_r);

// atan(x) = sign(x) * (atan(k) + atan(x_r))
// = sign(x) * (atan(k) + atan( (|x| - k) / (1 + k * |x|) ))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
#else

DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
double r = IS_NEG[x_sign] * (c0.hi + c1);

return r;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

// |x| >= 2^53 or x is NaN.
if (LIBC_UNLIKELY(x_exp >= 53)) {
// x is nan
if (xbits.is_nan()) {
if (xbits.is_signaling_nan()) {
fputil::raise_except_if_required(FE_INVALID);
return FPBits::quiet_nan().get_val();
}
return x;
}
// |x| >= 2^53
// atan(x) ~ sign(x) * pi/2.
if (x_exp >= 53)
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return IS_NEG[x_sign] * PI_OVER_2.hi;
#else
return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
IS_NEG[x_sign] * PI_OVER_2.lo);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

double x_d = xbits.get_val();
double y = 1.0 / x_d;

// k = 2^-6 * round(2^6 / |x|)
double k = fputil::nearest_integer(0x1.0p6 * y);
unsigned idx = static_cast<unsigned>(k);
k *= 0x1.0p-6;

// denominator = |x| + k
DoubleDouble den = fputil::exact_add(x_d, k);
// numerator = 1 - k * |x|
DoubleDouble num;
num.hi = fputil::multiply_add(-x_d, k, 1.0);
DoubleDouble prod = fputil::exact_mult(x_d, k);
// Using Dekker's 2SUM algorithm to compute the lower part.
num.lo = ((1.0 - num.hi) - prod.hi) - prod.lo;

// x_r = (1/|x| - k) / (1 - k/|x|)
// = (1 - k * |x|) / (|x| - k)
DoubleDouble x_r = fputil::div(num, den);

// Approximating atan(x_r) using Taylor polynomial.
DoubleDouble p = atan_eval(x_r);

// atan(x) = sign(x) * (pi/2 - atan(1/|x|))
// = sign(x) * (pi/2 - atan(k) - atan(x_r))
// = (-sign(x)) * (-pi/2 + atan(k) + atan((1 - k*|x|)/(|x| - k)))
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
#else
DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);

double r = IS_NEG[!x_sign] * (c1.hi + c2);

return r;
#endif
}

} // namespace math

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN_H
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

namespace LIBC_NAMESPACE_DECL {

namespace {
namespace atan_internal {

using DoubleDouble = fputil::DoubleDouble;
using Float128 = fputil::DyadicFloat<128>;
Expand All @@ -29,7 +29,7 @@ using Float128 = fputil::DyadicFloat<128>;
// b = round(atan(i/64) - a, D, RN);
// print("{", b, ",", a, "},");
// };
constexpr DoubleDouble ATAN_I[65] = {
static constexpr DoubleDouble ATAN_I[65] = {
{0.0, 0.0},
{-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
{-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
Expand Down Expand Up @@ -110,7 +110,8 @@ constexpr DoubleDouble ATAN_I[65] = {
// + x_lo * (1 - x_hi^2 + x_hi^4)
// Since p.lo is ~ x^3/3, the relative error from rounding is bounded by:
// |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66.
[[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) {
[[maybe_unused]] LIBC_INLINE static DoubleDouble
atan_eval(const DoubleDouble &x) {
DoubleDouble p;
p.hi = x.hi;
double x_hi_sq = x.hi * x.hi;
Expand Down Expand Up @@ -142,7 +143,7 @@ constexpr DoubleDouble ATAN_I[65] = {
// b = 2^ll + a;
// print("{Sign::POS, ", 2^(ll - 128), ",", b, "},");
// };
constexpr Float128 ATAN_I_F128[65] = {
static constexpr Float128 ATAN_I_F128[65] = {
{Sign::POS, 0, 0_u128},
{Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128},
{Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128},
Expand Down Expand Up @@ -215,7 +216,7 @@ constexpr Float128 ATAN_I_F128[65] = {
// [0, 2^-7]);
// > dirtyinfnorm(atan(x) - P, [0, 2^-7]);
// 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122
constexpr Float128 ATAN_POLY_F128[] = {
static constexpr Float128 ATAN_POLY_F128[] = {
{Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128},
{Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128},
{Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128},
Expand All @@ -225,7 +226,8 @@ constexpr Float128 ATAN_POLY_F128[] = {
};

// Approximate atan for |x| <= 2^-7.
[[maybe_unused]] Float128 atan_eval(const Float128 &x) {
[[maybe_unused]] LIBC_INLINE static constexpr Float128
atan_eval(const Float128 &x) {
Float128 x_sq = fputil::quick_mul(x, x);
Float128 x3 = fputil::quick_mul(x, x_sq);
Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1],
Expand All @@ -234,7 +236,7 @@ constexpr Float128 ATAN_POLY_F128[] = {
return fputil::multiply_add(x3, p, x);
}

} // anonymous namespace
} // namespace atan_internal

} // namespace LIBC_NAMESPACE_DECL

Expand Down
25 changes: 3 additions & 22 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4020,19 +4020,6 @@ add_entrypoint_object(
libc.src.errno.errno
)

add_header_library(
atan_utils
HDRS
atan_utils.h
DEPENDS
libc.src.__support.integer_literals
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.macros.optimization
)

add_entrypoint_object(
atanf
SRCS
Expand Down Expand Up @@ -4079,13 +4066,7 @@ add_entrypoint_object(
COMPILE_OPTIONS
-O3
DEPENDS
.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.macros.optimization
libc.src.__support.math.atan
)

add_entrypoint_object(
Expand Down Expand Up @@ -4115,7 +4096,7 @@ add_entrypoint_object(
HDRS
../atan2.h
DEPENDS
.atan_utils
libc.src.__support.math.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
Expand All @@ -4141,7 +4122,7 @@ add_entrypoint_object(
HDRS
../atan2f128.h
DEPENDS
.atan_utils
libc.src.__support.math.atan_utils
libc.src.__support.integer_literals
libc.src.__support.uint128
libc.src.__support.FPUtil.dyadic_float
Expand Down
Loading
Loading