Skip to content

Conversation

@lntue
Copy link
Contributor

@lntue lntue commented Mar 5, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Mar 5, 2025

@llvm/pr-subscribers-libc

Author: None (lntue)

Changes

Patch is 36.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129831.diff

11 Files Affected:

  • (modified) libc/src/math/generic/common_constants.cpp (+12-12)
  • (modified) libc/src/math/generic/exp.cpp (+22)
  • (modified) libc/src/math/generic/exp10.cpp (+19)
  • (modified) libc/src/math/generic/exp2.cpp (+18)
  • (modified) libc/src/math/generic/explogxf.cpp (+4-4)
  • (modified) libc/src/math/generic/explogxf.h (+20-14)
  • (modified) libc/src/math/generic/log.cpp (+9-3)
  • (modified) libc/src/math/generic/log10.cpp (+9-3)
  • (modified) libc/src/math/generic/log1p.cpp (+24-16)
  • (modified) libc/src/math/generic/log2.cpp (+14-8)
  • (modified) libc/src/math/generic/powf.cpp (+153)
diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp
index e29c083fa474a..3088ef96e3b93 100644
--- a/libc/src/math/generic/common_constants.cpp
+++ b/libc/src/math/generic/common_constants.cpp
@@ -112,7 +112,7 @@ const double LOG_F[128] = {
 // precision, and -2^-8 <= v < 2^-7.
 // TODO(lntue): Add reference to how the constants are derived after the
 // resulting paper is ready.
-alignas(32) const float R[128] = {
+alignas(8) const float R[128] = {
     0x1p0,     0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1,  0x1.ecp-1, 0x1.e8p-1,
     0x1.e4p-1, 0x1.ep-1,  0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
     0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1,  0x1.bep-1, 0x1.bap-1,
@@ -133,7 +133,7 @@ alignas(32) const float R[128] = {
     0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1,
     0x1.02p-1, 0x1.0p-1};
 
-alignas(64) const double RD[128] = {
+const double RD[128] = {
     0x1p0,     0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1,  0x1.ecp-1, 0x1.e8p-1,
     0x1.e4p-1, 0x1.ep-1,  0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1,
     0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1,  0x1.bep-1, 0x1.bap-1,
@@ -158,7 +158,7 @@ alignas(64) const double RD[128] = {
 // available.
 // Generated by Sollya with the formula: CD[i] = RD[i]*(1 + i*2^-7) - 1
 // for RD[i] defined on the table above.
-alignas(64) const double CD[128] = {
+const double CD[128] = {
     0.0,        -0x1p-14,   -0x1p-12,   -0x1.2p-11,  -0x1p-10,   -0x1.9p-10,
     -0x1.2p-9,  -0x1.88p-9, -0x1p-8,    -0x1.9p-11,  -0x1.fp-10, -0x1.9cp-9,
     -0x1p-12,   -0x1.cp-10, -0x1.bp-9,  -0x1.5p-11,  -0x1.4p-9,  0x1p-14,
@@ -183,7 +183,7 @@ alignas(64) const double CD[128] = {
     -0x1p-14,   -0x1p-8,
 };
 
-alignas(64) const double LOG_R[128] = {
+const double LOG_R[128] = {
     0x0.0000000000000p0,  0x1.010157588de71p-7, 0x1.0205658935847p-6,
     0x1.8492528c8cabfp-6, 0x1.0415d89e74444p-5, 0x1.466aed42de3eap-5,
     0x1.894aa149fb343p-5, 0x1.ccb73cdddb2ccp-5, 0x1.08598b59e3a07p-4,
@@ -228,7 +228,7 @@ alignas(64) const double LOG_R[128] = {
     0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1,  0x1.5af405c3649ep-1,
     0x1.5ee82aa24192p-1,  0x0.000000000000p0};
 
-alignas(64) const double LOG2_R[128] = {
+const double LOG2_R[128] = {
     0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
     0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
     0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
@@ -281,7 +281,7 @@ alignas(64) const double LOG2_R[128] = {
 //     print("{", -c, ",", -b, "},");
 //   };
 // We replace LOG_R[0] with log10(1.0) == 0.0
-alignas(64) const NumberPair<double> LOG_R_DD[128] = {
+alignas(16) const NumberPair<double> LOG_R_DD[128] = {
     {0.0, 0.0},
     {-0x1.0c76b999d2be8p-46, 0x1.010157589p-7},
     {-0x1.3dc5b06e2f7d2p-45, 0x1.0205658938p-6},
@@ -417,7 +417,7 @@ alignas(64) const NumberPair<double> LOG_R_DD[128] = {
 // Output range:
 //   [-0x1.3ffcp-15, 0x1.3e3dp-15]
 // We store S2[i] = 2^16 (r(i - 2^6) - 1).
-alignas(64) const int S2[193] = {
+alignas(8) const int S2[193] = {
     0x101,  0xfd,   0xf9,   0xf5,   0xf1,   0xed,   0xe9,   0xe5,   0xe1,
     0xdd,   0xd9,   0xd5,   0xd1,   0xcd,   0xc9,   0xc5,   0xc1,   0xbd,
     0xb9,   0xb4,   0xb0,   0xac,   0xa8,   0xa4,   0xa0,   0x9c,   0x98,
@@ -441,7 +441,7 @@ alignas(64) const int S2[193] = {
     -0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
     -0x1f0, -0x1f4, -0x1f8, -0x1fc};
 
-alignas(64) const double R2[193] = {
+const double R2[193] = {
     0x1.0101p0,  0x1.00fdp0,  0x1.00f9p0,  0x1.00f5p0,  0x1.00f1p0,
     0x1.00edp0,  0x1.00e9p0,  0x1.00e5p0,  0x1.00e1p0,  0x1.00ddp0,
     0x1.00d9p0,  0x1.00d5p0,  0x1.00d1p0,  0x1.00cdp0,  0x1.00c9p0,
@@ -488,7 +488,7 @@ alignas(64) const double R2[193] = {
 // Output range:
 //   [-0x1.01928p-22 , 0x1p-22]
 // We store S[i] = 2^21 (r(i - 80) - 1).
-alignas(64) const int S3[161] = {
+alignas(8) const int S3[161] = {
     0x50,  0x4f,  0x4e,  0x4d,  0x4c,  0x4b,  0x4a,  0x49,  0x48,  0x47,  0x46,
     0x45,  0x44,  0x43,  0x42,  0x41,  0x40,  0x3f,  0x3e,  0x3d,  0x3c,  0x3b,
     0x3a,  0x39,  0x38,  0x37,  0x36,  0x35,  0x34,  0x33,  0x32,  0x31,  0x30,
@@ -511,7 +511,7 @@ alignas(64) const int S3[161] = {
 // Output range:
 //   [-0x1.0002143p-29 , 0x1p-29]
 // We store S[i] = 2^28 (r(i - 65) - 1).
-alignas(64) const int S4[130] = {
+alignas(8) const int S4[130] = {
     0x41,  0x40,  0x3f,  0x3e,  0x3d,  0x3c,  0x3b,  0x3a,  0x39,  0x38,  0x37,
     0x36,  0x35,  0x34,  0x33,  0x32,  0x31,  0x30,  0x2f,  0x2e,  0x2d,  0x2c,
     0x2b,  0x2a,  0x29,  0x28,  0x27,  0x26,  0x25,  0x24,  0x23,  0x22,  0x21,
@@ -661,7 +661,7 @@ const double EXP_M2[128] = {
 //     d = round(a - b - c, D, RN);
 //     print("{", d, ",", c, ",", b, "},");
 //   };
-const fputil::TripleDouble EXP2_MID1[64] = {
+alignas(16) const fputil::TripleDouble EXP2_MID1[64] = {
     {0, 0, 0x1p0},
     {-0x1.9085b0a3d74d5p-110, -0x1.19083535b085dp-56, 0x1.02c9a3e778061p0},
     {0x1.05ff94f8d257ep-110, 0x1.d73e2a475b465p-55, 0x1.059b0d3158574p0},
@@ -739,7 +739,7 @@ const fputil::TripleDouble EXP2_MID1[64] = {
 //     d = round(a - b - c, D, RN);
 //     print("{", d, ",", c, ",", b, "},");
 //   };
-const fputil::TripleDouble EXP2_MID2[64] = {
+alignas(16) const fputil::TripleDouble EXP2_MID2[64] = {
     {0, 0, 0x1p0},
     {0x1.39726694630e3p-108, 0x1.ae8e38c59c72ap-54, 0x1.000b175effdc7p0},
     {0x1.e5e06ddd31156p-112, -0x1.7b5d0d58ea8f4p-58, 0x1.00162f3904052p0},
diff --git a/libc/src/math/generic/exp.cpp b/libc/src/math/generic/exp.cpp
index 38b683aa01166..143800ca078a6 100644
--- a/libc/src/math/generic/exp.cpp
+++ b/libc/src/math/generic/exp.cpp
@@ -39,8 +39,11 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
 // Error bounds:
 // Errors when using double precision.
 constexpr double ERR_D = 0x1.8p-63;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Errors when using double-double precision.
 constexpr double ERR_DD = 0x1.0p-99;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // -2^-12 * log(2)
 // > a = -2^-12 * log(2);
@@ -50,8 +53,11 @@ constexpr double ERR_DD = 0x1.0p-99;
 // Errors < 1.5 * 2^-133
 constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
 constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
 constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 namespace {
 
@@ -72,6 +78,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
   return p;
 }
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Polynomial approximation with double-double precision:
 // Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
 // For |dx| < 2^-13 + 2^-30:
@@ -171,6 +178,7 @@ DoubleDouble exp_double_double(double x, double kd,
 
   return r;
 }
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // Check for exceptional cases when
 // |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
@@ -373,6 +381,19 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
 
   double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  if (LIBC_UNLIKELY(denorm)) {
+    return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
+        .value();
+  } else {
+    // to multiply by 2^hi, a fast way is to simply add hi to the exponent
+    // field.
+    int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+    double r =
+        cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
+    return r;
+  }
+#else
   if (LIBC_UNLIKELY(denorm)) {
     if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
         LIBC_LIKELY(r.has_value()))
@@ -413,6 +434,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
   Float128 r_f128 = exp_f128(x, kd, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/exp10.cpp b/libc/src/math/generic/exp10.cpp
index 748c8a22b2368..c464979b092c3 100644
--- a/libc/src/math/generic/exp10.cpp
+++ b/libc/src/math/generic/exp10.cpp
@@ -44,15 +44,20 @@ constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
 // Errors < 1.5 * 2^-144
 constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
 constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
 constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // Error bounds:
 // Errors when using double precision.
 constexpr double ERR_D = 0x1.8p-63;
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Errors when using double-double precision.
 constexpr double ERR_DD = 0x1.8p-99;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 namespace {
 
@@ -72,6 +77,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
   return p;
 }
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Polynomial approximation with double-double precision.  Generated by Solya
 // with:
 // > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
@@ -171,6 +177,7 @@ DoubleDouble exp10_double_double(double x, double kd,
 
   return r;
 }
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // When output is denormal.
 double exp10_denorm(double x) {
@@ -199,6 +206,10 @@ double exp10_denorm(double x) {
 
   double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
+      .value();
+#else
   if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
       LIBC_LIKELY(r.has_value()))
     return r.value();
@@ -214,6 +225,7 @@ double exp10_denorm(double x) {
   Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 // Check for exceptional cases when:
@@ -391,6 +403,12 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
 
   double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+  double r =
+      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
+  return r;
+#else
   double upper = exp_mid.hi + (lo + ERR_D);
   double lower = exp_mid.hi + (lo - ERR_D);
 
@@ -473,6 +491,7 @@ LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
   Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp
index 935548b538b94..2c612777c9cb5 100644
--- a/libc/src/math/generic/exp2.cpp
+++ b/libc/src/math/generic/exp2.cpp
@@ -41,8 +41,10 @@ constexpr double ERR_D = 0x1.0p-63;
 constexpr double ERR_D = 0x1.8p-63;
 #endif // LIBC_TARGET_CPU_HAS_FMA
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Errors when using double-double precision.
 constexpr double ERR_DD = 0x1.0p-100;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 namespace {
 
@@ -62,6 +64,7 @@ LIBC_INLINE double poly_approx_d(double dx) {
   return p;
 }
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // Polynomial approximation with double-double precision.  Generated by Solya
 // with:
 // > P = fpminimax((2^x - 1)/x, 5, [|DD...|], [-2^-13 - 2^-30, 2^-13 + 2^-30]);
@@ -147,6 +150,7 @@ DoubleDouble exp2_double_double(double x, const DoubleDouble &exp_mid) {
 
   return r;
 }
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 // When output is denormal.
 double exp2_denorm(double x) {
@@ -174,6 +178,10 @@ double exp2_denorm(double x) {
 
   double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
+      .value();
+#else
   if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
       LIBC_LIKELY(r.has_value()))
     return r.value();
@@ -189,6 +197,7 @@ double exp2_denorm(double x) {
   Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 // Check for exceptional cases when:
@@ -358,6 +367,14 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
 
   double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // To multiply by 2^hi, a fast way is to simply add hi to the exponent
+  // field.
+  int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+  double r =
+      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
+  return r;
+#else
   double upper = exp_mid.hi + (lo + ERR_D);
   double lower = exp_mid.hi + (lo - ERR_D);
 
@@ -387,6 +404,7 @@ LLVM_LIBC_FUNCTION(double, exp2, (double x)) {
   Float128 r_f128 = exp2_f128(dx, hi, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/explogxf.cpp b/libc/src/math/generic/explogxf.cpp
index 9e945eca7aed6..d38efa0269693 100644
--- a/libc/src/math/generic/explogxf.cpp
+++ b/libc/src/math/generic/explogxf.cpp
@@ -12,7 +12,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
-alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
+alignas(8) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
     0x0.0000000000000p+0, 0x1.6e79685c2d22ap-6, 0x1.6bad3758efd87p-5,
     0x1.0eb389fa29f9bp-4, 0x1.663f6fac91316p-4, 0x1.bc84240adabbap-4,
     0x1.08c588cda79e4p-3, 0x1.32ae9e278ae1ap-3, 0x1.5c01a39fbd688p-3,
@@ -38,7 +38,7 @@ alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = {
 };
 
 // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
-alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
+alignas(8) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
     0x1.0000000000000p+0, 0x1.f81f81f81f820p-1, 0x1.f07c1f07c1f08p-1,
     0x1.e9131abf0b767p-1, 0x1.e1e1e1e1e1e1ep-1, 0x1.dae6076b981dbp-1,
     0x1.d41d41d41d41dp-1, 0x1.cd85689039b0bp-1, 0x1.c71c71c71c71cp-1,
@@ -64,11 +64,11 @@ alignas(64) const double LOG_P1_1_OVER[LOG_P1_SIZE] = {
 
 // Taylos series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
 // K_LOG2_ODD starts from x^3
-alignas(64) const
+alignas(8) const
     double K_LOG2_ODD[4] = {0x1.ec709dc3a03fdp-2, 0x1.2776c50ef9bfep-2,
                             0x1.a61762a7aded9p-3, 0x1.484b13d7c02a9p-3};
 
-alignas(64) const
+alignas(8) const
     double K_LOG2_EVEN[4] = {-0x1.71547652b82fep-1, -0x1.71547652b82fep-2,
                              -0x1.ec709dc3a03fdp-3, -0x1.2776c50ef9bfep-3};
 
diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h
index 651524a165f03..e79aa13eb57f7 100644
--- a/libc/src/math/generic/explogxf.h
+++ b/libc/src/math/generic/explogxf.h
@@ -338,8 +338,9 @@ LIBC_INLINE static double log_eval(double x) {
 //   double(2^-1022 + x) - 2^-1022 = double(x).
 // So if we scale x up by 2^1022, we can use
 //   double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
-LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
-                                                  double err) {
+template <bool SKIP_ZIV_TEST = false>
+LIBC_INLINE static cpp::optional<double>
+ziv_test_denorm(int hi, double mid, double lo, double err) {
   using FPBits = typename fputil::FPBits<double>;
 
   // Scaling factor = 1/(min normal number) = 2^1022
@@ -360,22 +361,27 @@ LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
     scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field.
   }
 
-  double err_scaled =
-      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err));
-
-  double lo_u = lo_scaled + err_scaled;
-  double lo_l = lo_scaled - err_scaled;
-
   // By adding 1.0, the results will have similar rounding points as denormal
   // outputs.
-  double upper = extra_factor + (mid_hi + lo_u);
-  double lower = extra_factor + (mid_hi + lo_l);
+  if constexpr (SKIP_ZIV_TEST) {
+    double r = extra_factor + (mid_hi + lo_scaled);
+    return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(r) - scale_down);
+  } else {
+    double err_scaled =
+        cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err));
 
-  if (LIBC_LIKELY(upper == lower)) {
-    return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down);
-  }
+    double lo_u = lo_scaled + err_scaled;
+    double lo_l = lo_scaled - err_scaled;
 
-  return cpp::nullopt;
+    double upper = extra_factor + (mid_hi + lo_u);
+    double lower = extra_factor + (mid_hi + lo_l);
+
+    if (LIBC_LIKELY(upper == lower)) {
+      return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down);
+    }
+
+    return cpp::nullopt;
+  }
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp
index 4302c64c8abac..04eebab975cd5 100644
--- a/libc/src/math/generic/log.cpp
+++ b/libc/src/math/generic/log.cpp
@@ -30,6 +30,7 @@ using LIBC_NAMESPACE::operator""_u128;
 
 namespace {
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // A simple upper bound for the error of e_x * log(2) - log(r).
 constexpr double HI_ERR = 0x1.0p-85;
 
@@ -47,7 +48,7 @@ constexpr double P_ERR = 0x1.0p-50;
 constexpr Float128 LOG_2(Sign::POS, /*exponent=*/-128, /*mantissa=*/
                          0xb17217f7'd1cf79ab'c9e3b398'03f2f6af_u128);
 
-alignas(64) constexpr LogRR LOG_TABLE = {
+alignas(16) constexpr LogRR LOG_TABLE = {
     // -log(r) with 128-bit precision generated by SageMath with:
     // for i in range(128):
     //   r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^(-7)) );
@@ -731,6 +732,7 @@ double log_accurate(int e_x, int index, double m_x) {
 
   return static_cast<double>(r);
 }
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 } // namespace
 
@@ -794,7 +796,7 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
   uint64_t x_m = (x_u & 0x000F'FFFF'FFFF'FFFFULL) | 0x3FF0'0000'0000'0000ULL;
   double m = FPBits_t(x_m).get_val();
 
-  double u, u_sq, err;
+  double u, u_sq;
   fputil::DoubleDouble r1;
 
   // Perform exact range reduction
@@ -819,12 +821,15 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
   double p2 = fputil::multiply_add(u, LOG_COEFFS[5], LOG_COEFFS[4]);
   double p = fputil::polyeval(u_sq, lo + r1.lo, p0, p1, p2);
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return r1.hi + p;
+#else
   // Technicallly error of r1.lo is bounded by:
   //    hi*ulp(log(2)_lo) + C*ulp(u^2)
   // To simplify the error computation a bit, we replace |hi|*ulp(log(2)_lo)
   // with the upper bound: 2^11 * ulp(log(2)_lo) = 2^-85.
   // Total error is bounded by ~ C * ulp(u^2) + 2^-85.
-  err = fputil::multiply_add(u_sq, P_ERR, HI_ERR);
+  double err = fputil::multiply_add(u_sq, P_ERR, HI_ERR);
 
   // Lower bound from the result
   double left = r1.hi + (p - err);
@@ -836,6 +841,7 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
     return left;
 
   return log_accurate(x_e, index, u);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp
index 7df57ef85b81b..fd8d5a8aae938 100644
--- a/libc/src/math/generic/log10.cpp
+++ b/libc/src/math/generic/log10.cpp
@@ -33,6 +33,7 @@ namespace {
 constexpr fputil::DoubleDouble LOG10_E = {0x1.95355baaafad3p-57,
                                           0x1.bcb7b1526e50ep-2};
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 // A simple upper bound for the error of e_x * log(2) - log(r).
 constexpr double HI_ERR = 0x1.0p-85;
 
@@ -50,7 +51,7 @@ constexpr double P_ERR = 0x1.0p-51;
 constexpr Float128 LOG10_2(Sign::POS, /*exponent=*/-129, /*mantissa=*/
                            0x9a209a84'fbcff798'8f8959ac'0b7c9178_u128);
 
-alignas(64) constexpr LogRR LOG10_TABLE = {
+alignas(16) constexpr LogRR LOG10_TABLE = {
     // -log10(r) with 128-bit precision generated by SageMath with:
     //
     // for i in range(128):
@@ -733,6 +734,7 @@ double log10_accurate(int e_x, int index, double m_x) {
 
   return static_cast<double>(r);
 }
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 
 } // namespace
 
@@ -796,7 +798,7 @@ LLVM_LIBC_FUNCTION(double,...
[truncated]

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall LGTM, do you want to split the alignas changes to a separate patch? They seem unrelated.

Copy link
Member

@petrhosek petrhosek left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still testing the PR locally to see the impact on size, but the PR itself looks good to me so feel free to merge it.

@lntue lntue merged commit f2bebdc into llvm:main Mar 6, 2025
16 checks passed
@lntue lntue deleted the small branch March 6, 2025 00:18
@lntue
Copy link
Contributor Author

lntue commented Mar 6, 2025

Overall LGTM, do you want to split the alignas changes to a separate patch? They seem unrelated.

Fixing the alignment of those constant tables of exp and log does bring down the size a bit, so I'd keep it in this PR.

@petrhosek
Copy link
Member

I got the numbers for Pico SDK math tests:

Before After
pico_double_test 101K 69K
pico_float_test 69K 56K

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants