Skip to content

Commit cbced2b

Browse files
committed
Address comments.
1 parent 6c1cc4c commit cbced2b

File tree

5 files changed

+28
-26
lines changed

5 files changed

+28
-26
lines changed

libc/src/__support/FPUtil/double_double.h

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
namespace LIBC_NAMESPACE_DECL {
1919
namespace fputil {
2020

21+
#define DEFAULT_DOUBLE_SPLIT 27
22+
2123
using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
2224

2325
// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
@@ -61,7 +63,8 @@ LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
6163
// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
6264
// Roundings," https://inria.hal.science/hal-04480440.
6365
// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
64-
template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
66+
template <size_t N = DEFAULT_DOUBLE_SPLIT>
67+
LIBC_INLINE constexpr DoubleDouble split(double a) {
6568
DoubleDouble r{0.0, 0.0};
6669
// CN = 2^N.
6770
constexpr double CN = static_cast<double>(1 << N);
@@ -73,16 +76,12 @@ template <size_t N = 27> LIBC_INLINE constexpr DoubleDouble split(double a) {
7376
return r;
7477
}
7578

76-
// Helper for non-fma exact mult where the first number is already splitted.
77-
template <bool NO_FMA_ALL_ROUNDINGS = false>
79+
// Helper for non-fma exact mult where the first number is already split.
80+
template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT>
7881
LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
7982
double b) {
80-
DoubleDouble bs, r;
81-
82-
if constexpr (NO_FMA_ALL_ROUNDINGS)
83-
bs = split<28>(b);
84-
else
85-
bs = split(b);
83+
DoubleDouble bs = split<SPLIT_B>(b);
84+
DoubleDouble r{0.0, 0.0};
8685

8786
r.hi = a * b;
8887
double t1 = as.hi * bs.hi - r.hi;
@@ -100,7 +99,7 @@ LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
10099
// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
101100
// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
102101
// then a * b = r.hi + r.lo is exact for all rounding modes.
103-
template <bool NO_FMA_ALL_ROUNDINGS = false>
102+
template <size_t SPLIT_B = 27>
104103
LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
105104
DoubleDouble r{0.0, 0.0};
106105

@@ -111,7 +110,7 @@ LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
111110
// Dekker's Product.
112111
DoubleDouble as = split(a);
113112

114-
r = exact_mult<NO_FMA_ALL_ROUNDINGS>(as, a, b);
113+
r = exact_mult<SPLIT_B>(as, a, b);
115114
#endif // LIBC_TARGET_CPU_HAS_FMA
116115

117116
return r;

libc/src/__support/macros/optimization.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
5050
#define LIBC_MATH 0
5151
#else
5252

53-
#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
53+
#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
5454
#define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
5555
#endif
5656

57-
#if ((LIBC_MATH & LIBC_MATH_SMALL_TABLES) != 0)
57+
#if (LIBC_MATH & LIBC_MATH_SMALL_TABLES)
5858
#define LIBC_MATH_HAS_SMALL_TABLES
5959
#endif
6060

libc/src/math/generic/range_reduction_double_common.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222
namespace LIBC_NAMESPACE_DECL {
2323

2424
#ifdef LIBC_TARGET_CPU_HAS_FMA
25-
static constexpr bool NO_FMA = false;
25+
static constexpr unsigned SPLIT = DEFAULT_DOUBLE_SPLIT;
2626
#else
27-
static constexpr bool NO_FMA = true;
27+
// When there is no-FMA instructions, in order to have exact product of 2 double
28+
// precision with directional roundings, we need to lower the precision of the
29+
// constants by at least 1 bit, and use a different splitting constant.
30+
static constexpr unsigned SPLIT = 28;
2831
#endif // LIBC_TARGET_CPU_HAS_FMA
2932

3033
using LIBC_NAMESPACE::fputil::DoubleDouble;

libc/src/math/generic/range_reduction_double_fma.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
3434
x_reduced = xbits.get_val();
3535
// x * c_hi = ph.hi + ph.lo exactly.
3636
DoubleDouble ph =
37-
fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
37+
fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
3838
// x * c_mid = pm.hi + pm.lo exactly.
3939
DoubleDouble pm =
40-
fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
40+
fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
4141
// x * c_lo = pl.hi + pl.lo exactly.
4242
DoubleDouble pl =
43-
fputil::exact_mult<NO_FMA>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
43+
fputil::exact_mult<SPLIT>(x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
4444
// Extract integral parts and fractional parts of (ph.lo + pm.hi).
4545
double sum_hi = ph.lo + pm.hi;
4646
double kd = fputil::nearest_integer(sum_hi);
@@ -67,7 +67,7 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
6767
// Then,
6868
// | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105
6969
// | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110
70-
u = fputil::quick_mult<NO_FMA>(y, PI_OVER_128_DD);
70+
u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD);
7171

7272
return static_cast<unsigned>(static_cast<int64_t>(kd));
7373
}

libc/src/math/generic/range_reduction_double_nofma.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
3434
x_reduced = xbits.get_val();
3535
// x * c_hi = ph.hi + ph.lo exactly.
3636
DoubleDouble x_split = fputil::split(x_reduced);
37-
DoubleDouble ph = fputil::exact_mult<NO_FMA>(
38-
x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
37+
DoubleDouble ph = fputil::exact_mult<SPLIT>(x_split, x_reduced,
38+
ONE_TWENTY_EIGHT_OVER_PI[idx][0]);
3939
// x * c_mid = pm.hi + pm.lo exactly.
40-
DoubleDouble pm = fputil::exact_mult<NO_FMA>(
41-
x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
40+
DoubleDouble pm = fputil::exact_mult<SPLIT>(x_split, x_reduced,
41+
ONE_TWENTY_EIGHT_OVER_PI[idx][1]);
4242
// x * c_lo = pl.hi + pl.lo exactly.
43-
DoubleDouble pl = fputil::exact_mult<NO_FMA>(
44-
x_split, x_reduced, ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
43+
DoubleDouble pl = fputil::exact_mult<SPLIT>(x_split, x_reduced,
44+
ONE_TWENTY_EIGHT_OVER_PI[idx][2]);
4545
// Extract integral parts and fractional parts of (ph.lo + pm.hi).
4646
double sum_hi = ph.lo + pm.hi;
4747
double kd = fputil::nearest_integer(sum_hi);
@@ -68,7 +68,7 @@ LIBC_INLINE unsigned LargeRangeReduction::fast(double x, DoubleDouble &u) {
6868
// Then,
6969
// | {x * 128/pi} - (y_hi + y_lo) | <= ulp(ulp(y_hi)) <= 2^-105
7070
// | {x mod pi/128} - (u.hi + u.lo) | < 2 * 2^-6 * 2^-105 = 2^-110
71-
u = fputil::quick_mult<NO_FMA>(y, PI_OVER_128_DD);
71+
u = fputil::quick_mult<SPLIT>(y, PI_OVER_128_DD);
7272

7373
return static_cast<unsigned>(static_cast<int64_t>(kd));
7474
}

0 commit comments

Comments
 (0)