Skip to content

Commit 244b5fd

Browse files
lntuejph-13
authored andcommitted
[libc][math] Add skip accurate pass option for exp*, log*, and powf functions. (llvm#129831)
1 parent 7eb99f2 commit 244b5fd

File tree

13 files changed

+306
-62
lines changed

13 files changed

+306
-62
lines changed

libc/src/math/generic/common_constants.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ const double LOG_F[128] = {
112112
// precision, and -2^-8 <= v < 2^-7.
113113
// TODO(lntue): Add reference to how the constants are derived after the
114114
// resulting paper is ready.
115-
alignas(32) const float R[128] = {
115+
alignas(8) const float R[128] = {
116116
0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1,
117117
0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
118118
0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1,
@@ -133,7 +133,7 @@ alignas(32) const float R[128] = {
133133
0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1,
134134
0x1.02p-1, 0x1.0p-1};
135135

136-
alignas(64) const double RD[128] = {
136+
const double RD[128] = {
137137
0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1,
138138
0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
139139
0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1,
@@ -158,7 +158,7 @@ alignas(64) const double RD[128] = {
158158
// available.
159159
// Generated by Sollya with the formula: CD[i] = RD[i]*(1 + i*2^-7) - 1
160160
// for RD[i] defined on the table above.
161-
alignas(64) const double CD[128] = {
161+
const double CD[128] = {
162162
0.0, -0x1p-14, -0x1p-12, -0x1.2p-11, -0x1p-10, -0x1.9p-10,
163163
-0x1.2p-9, -0x1.88p-9, -0x1p-8, -0x1.9p-11, -0x1.fp-10, -0x1.9cp-9,
164164
-0x1p-12, -0x1.cp-10, -0x1.bp-9, -0x1.5p-11, -0x1.4p-9, 0x1p-14,
@@ -183,7 +183,7 @@ alignas(64) const double CD[128] = {
183183
-0x1p-14, -0x1p-8,
184184
};
185185

186-
alignas(64) const double LOG_R[128] = {
186+
const double LOG_R[128] = {
187187
0x0.0000000000000p0, 0x1.010157588de71p-7, 0x1.0205658935847p-6,
188188
0x1.8492528c8cabfp-6, 0x1.0415d89e74444p-5, 0x1.466aed42de3eap-5,
189189
0x1.894aa149fb343p-5, 0x1.ccb73cdddb2ccp-5, 0x1.08598b59e3a07p-4,
@@ -228,7 +228,7 @@ alignas(64) const double LOG_R[128] = {
228228
0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1,
229229
0x1.5ee82aa24192p-1, 0x0.000000000000p0};
230230

231-
alignas(64) const double LOG2_R[128] = {
231+
const double LOG2_R[128] = {
232232
0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
233233
0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
234234
0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
@@ -281,7 +281,7 @@ alignas(64) const double LOG2_R[128] = {
281281
// print("{", -c, ",", -b, "},");
282282
// };
283283
// We replace LOG_R[0] with log10(1.0) == 0.0
284-
alignas(64) const NumberPair<double> LOG_R_DD[128] = {
284+
alignas(16) const NumberPair<double> LOG_R_DD[128] = {
285285
{0.0, 0.0},
286286
{-0x1.0c76b999d2be8p-46, 0x1.010157589p-7},
287287
{-0x1.3dc5b06e2f7d2p-45, 0x1.0205658938p-6},
@@ -417,7 +417,7 @@ alignas(64) const NumberPair<double> LOG_R_DD[128] = {
417417
// Output range:
418418
// [-0x1.3ffcp-15, 0x1.3e3dp-15]
419419
// We store S2[i] = 2^16 (r(i - 2^6) - 1).
420-
alignas(64) const int S2[193] = {
420+
alignas(8) const int S2[193] = {
421421
0x101, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1,
422422
0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xbd,
423423
0xb9, 0xb4, 0xb0, 0xac, 0xa8, 0xa4, 0xa0, 0x9c, 0x98,
@@ -441,7 +441,7 @@ alignas(64) const int S2[193] = {
441441
-0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
442442
-0x1f0, -0x1f4, -0x1f8, -0x1fc};
443443

444-
alignas(64) const double R2[193] = {
444+
const double R2[193] = {
445445
0x1.0101p0, 0x1.00fdp0, 0x1.00f9p0, 0x1.00f5p0, 0x1.00f1p0,
446446
0x1.00edp0, 0x1.00e9p0, 0x1.00e5p0, 0x1.00e1p0, 0x1.00ddp0,
447447
0x1.00d9p0, 0x1.00d5p0, 0x1.00d1p0, 0x1.00cdp0, 0x1.00c9p0,
@@ -488,7 +488,7 @@ alignas(64) const double R2[193] = {
488488
// Output range:
489489
// [-0x1.01928p-22 , 0x1p-22]
490490
// We store S[i] = 2^21 (r(i - 80) - 1).
491-
alignas(64) const int S3[161] = {
491+
alignas(8) const int S3[161] = {
492492
0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46,
493493
0x45, 0x44, 0x43, 0x42, 0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b,
494494
0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
@@ -511,7 +511,7 @@ alignas(64) const int S3[161] = {
511511
// Output range:
512512
// [-0x1.0002143p-29 , 0x1p-29]
513513
// We store S[i] = 2^28 (r(i - 65) - 1).
514-
alignas(64) const int S4[130] = {
514+
alignas(8) const int S4[130] = {
515515
0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37,
516516
0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c,
517517
0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21,
@@ -661,7 +661,7 @@ const double EXP_M2[128] = {
661661
// d = round(a - b - c, D, RN);
662662
// print("{", d, ",", c, ",", b, "},");
663663
// };
664-
const fputil::TripleDouble EXP2_MID1[64] = {
664+
alignas(16) const fputil::TripleDouble EXP2_MID1[64] = {
665665
{0, 0, 0x1p0},
666666
{-0x1.9085b0a3d74d5p-110, -0x1.19083535b085dp-56, 0x1.02c9a3e778061p0},
667667
{0x1.05ff94f8d257ep-110, 0x1.d73e2a475b465p-55, 0x1.059b0d3158574p0},
@@ -739,7 +739,7 @@ const fputil::TripleDouble EXP2_MID1[64] = {
739739
// d = round(a - b - c, D, RN);
740740
// print("{", d, ",", c, ",", b, "},");
741741
// };
742-
const fputil::TripleDouble EXP2_MID2[64] = {
742+
alignas(16) const fputil::TripleDouble EXP2_MID2[64] = {
743743
{0, 0, 0x1p0},
744744
{0x1.39726694630e3p-108, 0x1.ae8e38c59c72ap-54, 0x1.000b175effdc7p0},
745745
{0x1.e5e06ddd31156p-112, -0x1.7b5d0d58ea8f4p-58, 0x1.00162f3904052p0},

libc/src/math/generic/exp.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,11 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
3939
// Error bounds:
4040
// Errors when using double precision.
4141
constexpr double ERR_D = 0x1.8p-63;
42+
43+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4244
// Errors when using double-double precision.
4345
constexpr double ERR_DD = 0x1.0p-99;
46+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4447

4548
// -2^-12 * log(2)
4649
// > a = -2^-12 * log(2);
@@ -50,8 +53,11 @@ constexpr double ERR_DD = 0x1.0p-99;
5053
// Errors < 1.5 * 2^-133
5154
constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
5255
constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
56+
57+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
5358
constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
5459
constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
60+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
5561

5662
namespace {
5763

@@ -72,6 +78,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
7278
return p;
7379
}
7480

81+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
7582
// Polynomial approximation with double-double precision:
7683
// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
7784
// For |dx| < 2^-13 + 2^-30:
@@ -171,6 +178,7 @@ DoubleDouble exp_double_double(double x, double kd,
171178

172179
return r;
173180
}
181+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
174182

175183
// Check for exceptional cases when
176184
// |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
@@ -373,6 +381,19 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
373381

374382
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
375383

384+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
385+
if (LIBC_UNLIKELY(denorm)) {
386+
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
387+
.value();
388+
} else {
389+
// to multiply by 2^hi, a fast way is to simply add hi to the exponent
390+
// field.
391+
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
392+
double r =
393+
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
394+
return r;
395+
}
396+
#else
376397
if (LIBC_UNLIKELY(denorm)) {
377398
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
378399
LIBC_LIKELY(r.has_value()))
@@ -413,6 +434,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
413434
Float128 r_f128 = exp_f128(x, kd, idx1, idx2);
414435

415436
return static_cast<double>(r_f128);
437+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
416438
}
417439

418440
} // namespace LIBC_NAMESPACE_DECL

libc/src/math/generic/exp10.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,20 @@ constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
4444
// Errors < 1.5 * 2^-144
4545
constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
4646
constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
47+
48+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4749
constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
4850
constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
51+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4952

5053
// Error bounds:
5154
// Errors when using double precision.
5255
constexpr double ERR_D = 0x1.8p-63;
5356

57+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
5458
// Errors when using double-double precision.
5559
constexpr double ERR_DD = 0x1.8p-99;
60+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
5661

5762
namespace {
5863

@@ -72,6 +77,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
7277
return p;
7378
}
7479

80+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
7581
// Polynomial approximation with double-double precision. Generated by Solya
7682
// with:
7783
// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
@@ -171,6 +177,7 @@ DoubleDouble exp10_double_double(double x, double kd,
171177

172178
return r;
173179
}
180+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
174181

175182
// When output is denormal.
176183
double exp10_denorm(double x) {
@@ -199,6 +206,10 @@ double exp10_denorm(double x) {
199206

200207
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
201208

209+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
210+
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
211+
.value();
212+
#else
202213
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
203214
LIBC_LIKELY(r.has_value()))
204215
return r.value();
@@ -214,6 +225,7 @@ double exp10_denorm(double x) {
214225
Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
215226

216227
return static_cast<double>(r_f128);
228+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
217229
}
218230

219231
// Check for exceptional cases when:
@@ -391,6 +403,12 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
391403

392404
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
393405

406+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
407+
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
408+
double r =
409+
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
410+
return r;
411+
#else
394412
double upper = exp_mid.hi + (lo + ERR_D);
395413
double lower = exp_mid.hi + (lo - ERR_D);
396414

@@ -473,6 +491,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
473491
Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
474492

475493
return static_cast<double>(r_f128);
494+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
476495
}
477496

478497
} // namespace LIBC_NAMESPACE_DECL

libc/src/math/generic/exp2.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@ constexpr double ERR_D = 0x1.0p-63;
4141
constexpr double ERR_D = 0x1.8p-63;
4242
#endif // LIBC_TARGET_CPU_HAS_FMA
4343

44+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4445
// Errors when using double-double precision.
4546
constexpr double ERR_DD = 0x1.0p-100;
47+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
4648

4749
namespace {
4850

@@ -62,6 +64,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
6264
return p;
6365
}
6466

67+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
6568
// Polynomial approximation with double-double precision. Generated by Solya
6669
// with:
6770
// > P = fpminimax((2^x - 1)/x, 5, [|DD...|], [-2^-13 - 2^-30, 2^-13 + 2^-30]);
@@ -147,6 +150,7 @@ DoubleDouble exp2_double_double(double x, const DoubleDouble &exp_mid) {
147150

148151
return r;
149152
}
153+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
150154

151155
// When output is denormal.
152156
double exp2_denorm(double x) {
@@ -174,6 +178,10 @@ double exp2_denorm(double x) {
174178

175179
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
176180

181+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
182+
return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
183+
.value();
184+
#else
177185
if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
178186
LIBC_LIKELY(r.has_value()))
179187
return r.value();
@@ -189,6 +197,7 @@ double exp2_denorm(double x) {
189197
Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);
190198

191199
return static_cast<double>(r_f128);
200+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
192201
}
193202

194203
// Check for exceptional cases when:
@@ -358,6 +367,14 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
358367

359368
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
360369

370+
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
371+
// To multiply by 2^hi, a fast way is to simply add hi to the exponent
372+
// field.
373+
int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
374+
double r =
375+
cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
376+
return r;
377+
#else
361378
double upper = exp_mid.hi + (lo + ERR_D);
362379
double lower = exp_mid.hi + (lo - ERR_D);
363380

@@ -387,6 +404,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
387404
Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);
388405

389406
return static_cast<double>(r_f128);
407+
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
390408
}
391409

392410
} // namespace LIBC_NAMESPACE_DECL

libc/src/math/generic/explogxf.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace LIBC_NAMESPACE_DECL {
1313

1414
// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
15-
alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
15+
alignas(8) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
1616
0x0.0000000000000p+0, 0x1.6e79685c2d22ap-6, 0x1.6bad3758efd87p-5,
1717
0x1.0eb389fa29f9bp-4, 0x1.663f6fac91316p-4, 0x1.bc84240adabbap-4,
1818
0x1.08c588cda79e4p-3, 0x1.32ae9e278ae1ap-3, 0x1.5c01a39fbd688p-3,
@@ -38,7 +38,7 @@ alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
3838
};
3939

4040
// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
41-
alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
41+
alignas(8) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
4242
0x1.0000000000000p+0, 0x1.f81f81f81f820p-1, 0x1.f07c1f07c1f08p-1,
4343
0x1.e9131abf0b767p-1, 0x1.e1e1e1e1e1e1ep-1, 0x1.dae6076b981dbp-1,
4444
0x1.d41d41d41d41dp-1, 0x1.cd85689039b0bp-1, 0x1.c71c71c71c71cp-1,
@@ -64,11 +64,11 @@ alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
6464

6565
// Taylos series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
6666
// K_LOG2_ODD starts from x^3
67-
alignas(64) const
67+
alignas(8) const
6868
double K_LOG2_ODD[4] = {0x1.ec709dc3a03fdp-2, 0x1.2776c50ef9bfep-2,
6969
0x1.a61762a7aded9p-3, 0x1.484b13d7c02a9p-3};
7070

71-
alignas(64) const
71+
alignas(8) const
7272
double K_LOG2_EVEN[4] = {-0x1.71547652b82fep-1, -0x1.71547652b82fep-2,
7373
-0x1.ec709dc3a03fdp-3, -0x1.2776c50ef9bfep-3};
7474

0 commit comments

Comments
 (0)