Address comments.

lntue · lntue · commit cbced2bad726 · 2024-10-10T19:12:53.000Z
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
@@ -18,6 +18,8 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace fputil {
 
+#define DEFAULT_DOUBLE_SPLIT 27
+
 using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
 
 // The output of Dekker's FastTwoSum algorithm is correct, i.e.:
@@ -61,7 +63,8 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
 //   Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
 //   Roundings," https://inria.hal.science/hal-04480440.
 // Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
-template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
+template <size_t N = DEFAULT_DOUBLE_SPLIT>
+LIBC_INLINE constexpr DoubleDouble split(double a) {
   DoubleDouble r{0.0, 0.0};
   // CN = 2^N.
   constexpr double CN = static_cast<double>(1 << N);
@@ -73,16 +76,12 @@ template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
   return r;
 }
 
-// Helper for non-fma exact mult where the first number is already splitted.
-template <bool NO_FMA_ALL_ROUNDINGS = false>
+// Helper for non-fma exact mult where the first number is already split.
+template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT>
 LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
                                     double b) {
-  DoubleDouble bs, r;
-
-  if constexpr (NO_FMA_ALL_ROUNDINGS)
-    bs = split<28>(b);
-  else
-    bs = split(b);
+  DoubleDouble bs = split<SPLIT_B>(b);
+  DoubleDouble r{0.0, 0.0};
 
   r.hi = a * b;
   double t1 = as.hi * bs.hi - r.hi;
@@ -100,7 +99,7 @@ LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
 // Using Theorem 1 in the paper above, without FMA instruction, if we restrict
 // the generated constants to precision <= 51, and splitting it by 2^28 + 1,
 // then a * b = r.hi + r.lo is exact for all rounding modes.
-template <bool NO_FMA_ALL_ROUNDINGS = false>
+template <size_t SPLIT_B = 27>
 LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
   DoubleDouble r{0.0, 0.0};
 
@@ -111,7 +110,7 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
   // Dekker's Product.
   DoubleDouble as = split(a);
 
-  r = exact_mult<NO_FMA_ALL_ROUNDINGS>(as, a, b);
+  r = exact_mult<SPLIT_B>(as, a, b);
 #endif // LIBC_TARGET_CPU_HAS_FMA
 
   return r;
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
@@ -50,11 +50,11 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
 #define LIBC_MATH 0
 #else
 
-#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
+#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
 #define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 #endif
 
-#if ((LIBC_MATH & LIBC_MATH_SMALL_TABLES) != 0)
+#if (LIBC_MATH & LIBC_MATH_SMALL_TABLES)
 #define LIBC_MATH_HAS_SMALL_TABLES
 #endif
 
diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h
@@ -22,9 +22,12 @@
 namespace LIBC_NAMESPACE_DECL {
 
 #ifdef LIBC_TARGET_CPU_HAS_FMA
-static constexpr bool NO_FMA = false;
+static constexpr unsigned SPLIT = DEFAULT_DOUBLE_SPLIT;
 #else
-static constexpr bool NO_FMA = true;
+// When there is no-FMA instructions, in order to have exact product of 2 double
+// precision with directional roundings, we need to lower the precision of the
+// constants by at least 1 bit, and use a different splitting constant.
+static constexpr unsigned SPLIT = 28;
 #endif // LIBC_TARGET_CPU_HAS_FMA
 
 using LIBC_NAMESPACE::fputil::DoubleDouble;
diff --git a/libc/src/math/generic/range_reduction_double_fma.h b/libc/src/math/generic/range_reduction_double_fma.h
@@ -34,13 +34,13 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
   x_reduced = xbits.get_val();
   // x * c_hi = ph.hi + ph.lo exactly.
   DoubleDouble ph =
-      fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
+      fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
   // x * c_mid = pm.hi + pm.lo exactly.
   DoubleDouble pm =
-      fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
+      fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
   // x * c_lo = pl.hi + pl.lo exactly.
   DoubleDouble pl =
-      fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
+      fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
   // Extract integral parts and fractional parts of (ph.lo + pm.hi).
   double sum_hi = ph.lo + pm.hi;
   double kd = fputil::nearest_integer(sum_hi);
@@ -67,7 +67,7 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
   // Then,
   //   | {x * 128/pi} - (y_hi + y_lo) | <=  ulp(ulp(y_hi)) <= 2^-105
   //   | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110
-  u = fputil::quick_mult<NO_FMA>(y, PI_OVER_128_DD);
+  u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD);
 
   return static_cast<unsigned>(static_cast<int64_t>(kd));
 }
diff --git a/libc/src/math/generic/range_reduction_double_nofma.h b/libc/src/math/generic/range_reduction_double_nofma.h
@@ -34,14 +34,14 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
   x_reduced = xbits.get_val();
   // x * c_hi = ph.hi + ph.lo exactly.
   DoubleDouble x_split = fputil::split(x_reduced);
-  DoubleDouble ph = fputil::exact_mult<NO_FMA>(
-      x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
+  DoubleDouble ph = fputil::exact_mult<SPLIT>(x_split, x_reduced,
+                                              ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
   // x * c_mid = pm.hi + pm.lo exactly.
-  DoubleDouble pm = fputil::exact_mult<NO_FMA>(
-      x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
+  DoubleDouble pm = fputil::exact_mult<SPLIT>(x_split, x_reduced,
+                                              ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
   // x * c_lo = pl.hi + pl.lo exactly.
-  DoubleDouble pl = fputil::exact_mult<NO_FMA>(
-      x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
+  DoubleDouble pl = fputil::exact_mult<SPLIT>(x_split, x_reduced,
+                                              ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
   // Extract integral parts and fractional parts of (ph.lo + pm.hi).
   double sum_hi = ph.lo + pm.hi;
   double kd = fputil::nearest_integer(sum_hi);
@@ -68,7 +68,7 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
   // Then,
   //   | {x * 128/pi} - (y_hi + y_lo) | <=  ulp(ulp(y_hi)) <= 2^-105
   //   | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110
-  u = fputil::quick_mult<NO_FMA>(y, PI_OVER_128_DD);
+  u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD);
 
   return static_cast<unsigned>(static_cast<int64_t>(kd));
 }