Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions libc/src/math/generic/common_constants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ const double LOG_F[128] = {
// precision, and -2^-8 <= v < 2^-7.
// TODO(lntue): Add reference to how the constants are derived after the
// resulting paper is ready.
alignas(32) const float R[128] = {
alignas(8) const float R[128] = {
0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1,
0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1,
Expand All @@ -133,7 +133,7 @@ alignas(32) const float R[128] = {
0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1,
0x1.02p-1, 0x1.0p-1};

alignas(64) const double RD[128] = {
const double RD[128] = {
0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1,
0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1,
Expand All @@ -158,7 +158,7 @@ alignas(64) const double RD[128] = {
// available.
// Generated by Sollya with the formula: CD[i] = RD[i]*(1 + i*2^-7) - 1
// for RD[i] defined on the table above.
alignas(64) const double CD[128] = {
const double CD[128] = {
0.0, -0x1p-14, -0x1p-12, -0x1.2p-11, -0x1p-10, -0x1.9p-10,
-0x1.2p-9, -0x1.88p-9, -0x1p-8, -0x1.9p-11, -0x1.fp-10, -0x1.9cp-9,
-0x1p-12, -0x1.cp-10, -0x1.bp-9, -0x1.5p-11, -0x1.4p-9, 0x1p-14,
Expand All @@ -183,7 +183,7 @@ alignas(64) const double CD[128] = {
-0x1p-14, -0x1p-8,
};

alignas(64) const double LOG_R[128] = {
const double LOG_R[128] = {
0x0.0000000000000p0, 0x1.010157588de71p-7, 0x1.0205658935847p-6,
0x1.8492528c8cabfp-6, 0x1.0415d89e74444p-5, 0x1.466aed42de3eap-5,
0x1.894aa149fb343p-5, 0x1.ccb73cdddb2ccp-5, 0x1.08598b59e3a07p-4,
Expand Down Expand Up @@ -228,7 +228,7 @@ alignas(64) const double LOG_R[128] = {
0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1,
0x1.5ee82aa24192p-1, 0x0.000000000000p0};

alignas(64) const double LOG2_R[128] = {
const double LOG2_R[128] = {
0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
Expand Down Expand Up @@ -281,7 +281,7 @@ alignas(64) const double LOG2_R[128] = {
// print("{", -c, ",", -b, "},");
// };
// We replace LOG_R[0] with log10(1.0) == 0.0
alignas(64) const NumberPair<double> LOG_R_DD[128] = {
alignas(16) const NumberPair<double> LOG_R_DD[128] = {
{0.0, 0.0},
{-0x1.0c76b999d2be8p-46, 0x1.010157589p-7},
{-0x1.3dc5b06e2f7d2p-45, 0x1.0205658938p-6},
Expand Down Expand Up @@ -417,7 +417,7 @@ alignas(64) const NumberPair<double> LOG_R_DD[128] = {
// Output range:
// [-0x1.3ffcp-15, 0x1.3e3dp-15]
// We store S2[i] = 2^16 (r(i - 2^6) - 1).
alignas(64) const int S2[193] = {
alignas(8) const int S2[193] = {
0x101, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1,
0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xbd,
0xb9, 0xb4, 0xb0, 0xac, 0xa8, 0xa4, 0xa0, 0x9c, 0x98,
Expand All @@ -441,7 +441,7 @@ alignas(64) const int S2[193] = {
-0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
-0x1f0, -0x1f4, -0x1f8, -0x1fc};

alignas(64) const double R2[193] = {
const double R2[193] = {
0x1.0101p0, 0x1.00fdp0, 0x1.00f9p0, 0x1.00f5p0, 0x1.00f1p0,
0x1.00edp0, 0x1.00e9p0, 0x1.00e5p0, 0x1.00e1p0, 0x1.00ddp0,
0x1.00d9p0, 0x1.00d5p0, 0x1.00d1p0, 0x1.00cdp0, 0x1.00c9p0,
Expand Down Expand Up @@ -488,7 +488,7 @@ alignas(64) const double R2[193] = {
// Output range:
// [-0x1.01928p-22 , 0x1p-22]
// We store S[i] = 2^21 (r(i - 80) - 1).
alignas(64) const int S3[161] = {
alignas(8) const int S3[161] = {
0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46,
0x45, 0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b,
0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
Expand All @@ -511,7 +511,7 @@ alignas(64) const int S3[161] = {
// Output range:
// [-0x1.0002143p-29 , 0x1p-29]
// We store S[i] = 2^28 (r(i - 65) - 1).
alignas(64) const int S4[130] = {
alignas(8) const int S4[130] = {
0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37,
0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c,
0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21,
Expand Down Expand Up @@ -661,7 +661,7 @@ const double EXP_M2[128] = {
// d = round(a - b - c, D, RN);
// print("{", d, ",", c, ",", b, "},");
// };
const fputil::TripleDouble EXP2_MID1[64] = {
alignas(16) const fputil::TripleDouble EXP2_MID1[64] = {
{0, 0, 0x1p0},
{-0x1.9085b0a3d74d5p-110, -0x1.19083535b085dp-56, 0x1.02c9a3e778061p0},
{0x1.05ff94f8d257ep-110, 0x1.d73e2a475b465p-55, 0x1.059b0d3158574p0},
Expand Down Expand Up @@ -739,7 +739,7 @@ const fputil::TripleDouble EXP2_MID1[64] = {
// d = round(a - b - c, D, RN);
// print("{", d, ",", c, ",", b, "},");
// };
const fputil::TripleDouble EXP2_MID2[64] = {
alignas(16) const fputil::TripleDouble EXP2_MID2[64] = {
{0, 0, 0x1p0},
{0x1.39726694630e3p-108, 0x1.ae8e38c59c72ap-54, 0x1.000b175effdc7p0},
{0x1.e5e06ddd31156p-112, -0x1.7b5d0d58ea8f4p-58, 0x1.00162f3904052p0},
Expand Down
22 changes: 22 additions & 0 deletions libc/src/math/generic/exp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
// Error bounds:
// Errors when using double precision.
constexpr double ERR_D = 0x1.8p-63;

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Errors when using double-double precision.
constexpr double ERR_DD = 0x1.0p-99;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// -2^-12 * log(2)
// > a = -2^-12 * log(2);
Expand All @@ -50,8 +53,11 @@ constexpr double ERR_DD = 0x1.0p-99;
// Errors < 1.5 * 2^-133
constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

namespace {

Expand All @@ -72,6 +78,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
return p;
}

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Polynomial approximation with double-double precision:
// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
// For |dx| < 2^-13 + 2^-30:
Expand Down Expand Up @@ -171,6 +178,7 @@ DoubleDouble exp_double_double(double x, double kd,

return r;
}
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// Check for exceptional cases when
// |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
Expand Down Expand Up @@ -373,6 +381,19 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {

double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (LIBC_UNLIKELY(denorm)) {
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
.value();
} else {
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r =
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
return r;
}
#else
if (LIBC_UNLIKELY(denorm)) {
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
LIBC_LIKELY(r.has_value()))
Expand Down Expand Up @@ -413,6 +434,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
Float128 r_f128 = exp_f128(x, kd, idx1, idx2);

return static_cast<double>(r_f128);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

} // namespace LIBC_NAMESPACE_DECL
19 changes: 19 additions & 0 deletions libc/src/math/generic/exp10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,20 @@ constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
// Errors < 1.5 * 2^-144
constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// Error bounds:
// Errors when using double precision.
constexpr double ERR_D = 0x1.8p-63;

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Errors when using double-double precision.
constexpr double ERR_DD = 0x1.8p-99;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

namespace {

Expand All @@ -72,6 +77,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
return p;
}

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Polynomial approximation with double-double precision. Generated by Solya
// with:
// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
Expand Down Expand Up @@ -171,6 +177,7 @@ DoubleDouble exp10_double_double(double x, double kd,

return r;
}
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// When output is denormal.
double exp10_denorm(double x) {
Expand Down Expand Up @@ -199,6 +206,10 @@ double exp10_denorm(double x) {

double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
.value();
#else
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
LIBC_LIKELY(r.has_value()))
return r.value();
Expand All @@ -214,6 +225,7 @@ double exp10_denorm(double x) {
Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);

return static_cast<double>(r_f128);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

// Check for exceptional cases when:
Expand Down Expand Up @@ -391,6 +403,12 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {

double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r =
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
return r;
#else
double upper = exp_mid.hi + (lo + ERR_D);
double lower = exp_mid.hi + (lo - ERR_D);

Expand Down Expand Up @@ -473,6 +491,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);

return static_cast<double>(r_f128);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

} // namespace LIBC_NAMESPACE_DECL
18 changes: 18 additions & 0 deletions libc/src/math/generic/exp2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ constexpr double ERR_D = 0x1.0p-63;
constexpr double ERR_D = 0x1.8p-63;
#endif // LIBC_TARGET_CPU_HAS_FMA

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Errors when using double-double precision.
constexpr double ERR_DD = 0x1.0p-100;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

namespace {

Expand All @@ -62,6 +64,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
return p;
}

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Polynomial approximation with double-double precision. Generated by Solya
// with:
// > P = fpminimax((2^x - 1)/x, 5, [|DD...|], [-2^-13 - 2^-30, 2^-13 + 2^-30]);
Expand Down Expand Up @@ -147,6 +150,7 @@ DoubleDouble exp2_double_double(double x, const DoubleDouble &exp_mid) {

return r;
}
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// When output is denormal.
double exp2_denorm(double x) {
Expand Down Expand Up @@ -174,6 +178,10 @@ double exp2_denorm(double x) {

double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
.value();
#else
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
LIBC_LIKELY(r.has_value()))
return r.value();
Expand All @@ -189,6 +197,7 @@ double exp2_denorm(double x) {
Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);

return static_cast<double>(r_f128);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

// Check for exceptional cases when:
Expand Down Expand Up @@ -358,6 +367,14 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {

double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
// field.
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
double r =
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
return r;
#else
double upper = exp_mid.hi + (lo + ERR_D);
double lower = exp_mid.hi + (lo - ERR_D);

Expand Down Expand Up @@ -387,6 +404,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);

return static_cast<double>(r_f128);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}

} // namespace LIBC_NAMESPACE_DECL
8 changes: 4 additions & 4 deletions libc/src/math/generic/explogxf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace LIBC_NAMESPACE_DECL {

// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
alignas(8) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
0x0.0000000000000p+0, 0x1.6e79685c2d22ap-6, 0x1.6bad3758efd87p-5,
0x1.0eb389fa29f9bp-4, 0x1.663f6fac91316p-4, 0x1.bc84240adabbap-4,
0x1.08c588cda79e4p-3, 0x1.32ae9e278ae1ap-3, 0x1.5c01a39fbd688p-3,
Expand All @@ -38,7 +38,7 @@ alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
};

// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
alignas(8) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
0x1.0000000000000p+0, 0x1.f81f81f81f820p-1, 0x1.f07c1f07c1f08p-1,
0x1.e9131abf0b767p-1, 0x1.e1e1e1e1e1e1ep-1, 0x1.dae6076b981dbp-1,
0x1.d41d41d41d41dp-1, 0x1.cd85689039b0bp-1, 0x1.c71c71c71c71cp-1,
Expand All @@ -64,11 +64,11 @@ alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {

// Taylos series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
// K_LOG2_ODD starts from x^3
alignas(64) const
alignas(8) const
double K_LOG2_ODD[4] = {0x1.ec709dc3a03fdp-2, 0x1.2776c50ef9bfep-2,
0x1.a61762a7aded9p-3, 0x1.484b13d7c02a9p-3};

alignas(64) const
alignas(8) const
double K_LOG2_EVEN[4] = {-0x1.71547652b82fep-1, -0x1.71547652b82fep-2,
-0x1.ec709dc3a03fdp-3, -0x1.2776c50ef9bfep-3};

Expand Down
Loading
Loading