Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libc/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
"value": false,
"doc": "Use the same mode for double and long double in printf."
},
"LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320": {
"value": false,
"doc": "Use an alternative printf float implementation based on 320-bit floats"
},
"LIBC_CONF_PRINTF_DISABLE_FIXED_POINT": {
"value": false,
"doc": "Disable printing fixed point values in printf and friends."
Expand Down
1 change: 1 addition & 0 deletions libc/docs/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ to learn about the defaults for your platform and target.
- ``LIBC_CONF_PRINTF_DISABLE_WRITE_INT``: Disable handling of %n in printf format string.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_NO_SPECIALIZE_LD``: Use the same mode for double and long double in printf.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_DYADIC_FLOAT``: Use dyadic float for faster and smaller but less accurate printf doubles.
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320``: Use an alternative printf float implementation based on 320-bit floats
- ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance.
* **"pthread" options**
- ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100).
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/CPP/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ template <class T> LIBC_INLINE constexpr const T &min(const T &a, const T &b) {
return (a < b) ? a : b;
}

template <class T> LIBC_INLINE constexpr T abs(T a) { return a < 0 ? -a : a; }

template <class InputIt, class UnaryPred>
LIBC_INLINE constexpr InputIt find_if_not(InputIt first, InputIt last,
UnaryPred q) {
Expand Down
193 changes: 193 additions & 0 deletions libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,49 @@
namespace LIBC_NAMESPACE_DECL {
namespace fputil {

// Decide whether to round a UInt up, down or not at all at a given bit
// position, based on the current rounding mode. The assumption is that the
// caller is going to make the integer `value >> rshift`, and then might need
// to round it up by 1 depending on the value of the bits shifted off the
// bottom.
//
// `logical_sign` causes the behavior of FE_DOWNWARD and FE_UPWARD to
// be reversed, which is what you'd want if this is the mantissa of a
// negative floating-point number.
//
// Return value is +1 if the value should be rounded up; -1 if it should be
// rounded down; 0 if it's exact and needs no rounding.
template <size_t Bits>
LIBC_INLINE constexpr int
rounding_direction(const LIBC_NAMESPACE::UInt<Bits> &value, size_t rshift,
Sign logical_sign) {
if (rshift == 0 || (rshift < Bits && (value << (Bits - rshift)) == 0) ||
(rshift >= Bits && value == 0))
return 0; // exact

switch (quick_get_round()) {
case FE_TONEAREST:
if (rshift > 0 && rshift <= Bits && value.get_bit(rshift - 1)) {
// We round up, unless the value is an exact halfway case and
// the bit that will end up in the units place is 0, in which
// case tie-break-to-even says round down.
return value.get_bit(rshift) != 0 || (value << (Bits - rshift + 1)) != 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like value.get_bit(rshift) will be overflow if rshift == Bits?

? +1
: -1;
} else {
return -1;
}
case FE_TOWARDZERO:
return -1;
case FE_DOWNWARD:
return logical_sign.is_neg() && (value << (Bits - rshift)) != 0 ? +1 : -1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bits - rshift could be negative here?

case FE_UPWARD:
return logical_sign.is_pos() && (value << (Bits - rshift)) != 0 ? +1 : -1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

default:
__builtin_unreachable();
}
}

// A generic class to perform computations of high precision floating points.
// We store the value in dyadic format, including 3 fields:
// sign : boolean value - false means positive, true means negative
Expand Down Expand Up @@ -101,6 +144,27 @@ template <size_t Bits> struct DyadicFloat {
return exponent + (Bits - 1);
}

// Produce a correctly rounded DyadicFloat from a too-large mantissa,
// by shifting it down and rounding if necessary.
template <size_t MantissaBits>
LIBC_INLINE constexpr static DyadicFloat<Bits>
round(Sign result_sign, int result_exponent,
const LIBC_NAMESPACE::UInt<MantissaBits> &input_mantissa,
size_t rshift) {
MantissaType result_mantissa(input_mantissa >> rshift);
if (rounding_direction(input_mantissa, rshift, result_sign) > 0) {
++result_mantissa;
if (result_mantissa == 0) {
// Rounding up made the mantissa integer wrap round to 0,
// carrying a bit off the top. So we've rounded up to the next
// exponent.
result_mantissa.set_bit(Bits - 1);
++result_exponent;
}
}
return DyadicFloat(result_sign, result_exponent, result_mantissa);
}

#ifdef LIBC_TYPES_HAS_FLOAT16
template <typename T, bool ShouldSignalExceptions>
LIBC_INLINE constexpr cpp::enable_if_t<
Expand Down Expand Up @@ -374,6 +438,39 @@ template <size_t Bits> struct DyadicFloat {

return new_mant;
}

LIBC_INLINE constexpr MantissaType
as_mantissa_type_rounded(int *round_dir_out = nullptr) const {
int round_dir = 0;
MantissaType new_mant;
if (mantissa.is_zero()) {
new_mant = 0;
} else {
new_mant = mantissa;
if (exponent > 0) {
new_mant <<= exponent;
} else if (exponent < 0) {
size_t shift = -exponent;
new_mant >>= shift;
round_dir = rounding_direction(mantissa, shift, sign);
if (round_dir > 0)
++new_mant;
}

if (sign.is_neg()) {
new_mant = (~new_mant) + 1;
}
}

if (round_dir_out)
*round_dir_out = round_dir;

return new_mant;
}

LIBC_INLINE constexpr DyadicFloat operator-() const {
return DyadicFloat(sign.negate(), exponent, mantissa);
}
};

// Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize the
Expand Down Expand Up @@ -433,6 +530,12 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
return result.normalize();
}

template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits> quick_sub(DyadicFloat<Bits> a,
DyadicFloat<Bits> b) {
return quick_add(a, -b);
}

// Quick Mul - Slightly less accurate but efficient multiplication of 2 dyadic
// floats with rounding toward 0 and then normalize the output:
// result.exponent = a.exponent + b.exponent + Bits,
Expand Down Expand Up @@ -464,6 +567,96 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
return result;
}

// Correctly rounded multiplication of 2 dyadic floats, assuming the
// exponent remains within range.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
rounded_mul(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b) {
using DblMant = LIBC_NAMESPACE::UInt<(2 * Bits)>;
Sign result_sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
int result_exponent = a.exponent + b.exponent + static_cast<int>(Bits);
auto product = DblMant(a.mantissa) * DblMant(b.mantissa);
// As in quick_mul(), renormalize by 1 bit manually rather than countl_zero
if (product.get_bit(2 * Bits - 1) == 0) {
product <<= 1;
result_exponent -= 1;
}

return DyadicFloat<Bits>::round(result_sign, result_exponent, product, Bits);
}

// Approximate reciprocal - given a nonzero a, make a good approximation to 1/a.
// The method is Newton-Raphson iteration, based on quick_mul.
template <size_t Bits, typename = cpp::enable_if_t<(Bits >= 32)>>
LIBC_INLINE constexpr DyadicFloat<Bits>
approx_reciprocal(const DyadicFloat<Bits> &a) {
// Given an approximation x to 1/a, a better one is x' = x(2-ax).
//
// You can derive this by using the Newton-Raphson formula with the function
// f(x) = 1/x - a. But another way to see that it works is to say: suppose
// that ax = 1-e for some small error e. Then ax' = ax(2-ax) = (1-e)(1+e) =
// 1-e^2. So the error in x' is the square of the error in x, i.e. the number
// of correct bits in x' is double the number in x.

// An initial approximation to the reciprocal
DyadicFloat<Bits> x(Sign::POS, -32 - a.exponent - Bits,
uint64_t(0xFFFFFFFFFFFFFFFF) /
static_cast<uint64_t>(a.mantissa >> (Bits - 32)));

// The constant 2, which we'll need in every iteration
DyadicFloat<Bits> two(Sign::POS, 1, 1);

// We expect at least 31 correct bits from our 32-bit starting approximation
size_t ok_bits = 31;

// The number of good bits doubles in each iteration, except that rounding
// errors introduce a little extra each time. Subtract a bit from our
// accuracy assessment to account for that.
while (ok_bits < Bits) {
x = quick_mul(x, quick_sub(two, quick_mul(a, x)));
ok_bits = 2 * ok_bits - 1;
}

return x;
}

// Correctly rounded division of 2 dyadic floats, assuming the
// exponent remains within range.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
rounded_div(const DyadicFloat<Bits> &af, const DyadicFloat<Bits> &bf) {
using DblMant = LIBC_NAMESPACE::UInt<(Bits * 2 + 64)>;

// Make an approximation to the quotient as a * (1/b). Both the
// multiplication and the reciprocal are a bit sloppy, which doesn't
// matter, because we're going to correct for that below.
auto qf = fputil::quick_mul(af, fputil::approx_reciprocal(bf));

// Switch to BigInt and stop using quick_add and quick_mul: now
// we're working in exact integers so as to get the true remainder.
DblMant a = af.mantissa, b = bf.mantissa, q = qf.mantissa;
q <<= 2; // leave room for a round bit, even if exponent decreases
a <<= af.exponent - bf.exponent - qf.exponent + 2;
DblMant qb = q * b;
if (qb < a) {
DblMant too_small = a - b;
while (qb <= too_small) {
qb += b;
++q;
}
} else {
while (qb > a) {
qb -= b;
--q;
}
}

DyadicFloat<(Bits * 2)> qbig(qf.sign, qf.exponent - 2, q);
auto qfinal = DyadicFloat<Bits>::round(qbig.sign, qbig.exponent + Bits,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: just return DyadicFloat<Bits>::round...

qbig.mantissa, Bits);
return qfinal;
}

// Simple polynomial approximation.
template <size_t Bits>
LIBC_INLINE constexpr DyadicFloat<Bits>
Expand Down
20 changes: 13 additions & 7 deletions libc/src/__support/big_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,18 @@ struct BigInt {
// Return the i-th word of the number.
LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; }

// Return the i-th bit of the number.
LIBC_INLINE constexpr bool get_bit(size_t i) const {
const size_t word_index = i / WORD_SIZE;
return 1 & (val[word_index] >> (i % WORD_SIZE));
}

// Set the i-th bit of the number.
LIBC_INLINE constexpr void set_bit(size_t i) {
const size_t word_index = i / WORD_SIZE;
val[word_index] |= WordType(1) << (i % WORD_SIZE);
}

private:
LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) {
constexpr auto compare = [](WordType a, WordType b) {
Expand Down Expand Up @@ -968,7 +980,7 @@ struct BigInt {
}

LIBC_INLINE constexpr void decrement() {
multiword::add_with_carry(val, cpp::array<WordType, 1>{1});
multiword::sub_with_borrow(val, cpp::array<WordType, 1>{1});
}

LIBC_INLINE constexpr void extend(size_t index, bool is_neg) {
Expand All @@ -989,12 +1001,6 @@ struct BigInt {
LIBC_INLINE constexpr void clear_msb() {
val.back() &= mask_trailing_ones<WordType, WORD_SIZE - 1>();
}

LIBC_INLINE constexpr void set_bit(size_t i) {
const size_t word_index = i / WORD_SIZE;
val[word_index] |= WordType(1) << (i % WORD_SIZE);
}

LIBC_INLINE constexpr static Division divide_unsigned(const BigInt &dividend,
const BigInt &divider) {
BigInt remainder = dividend;
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/sign.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ struct Sign {
static const Sign POS;
static const Sign NEG;

LIBC_INLINE constexpr Sign negate() const { return Sign(!is_negative); }

private:
LIBC_INLINE constexpr explicit Sign(bool is_negative)
: is_negative(is_negative) {}
Expand Down
3 changes: 3 additions & 0 deletions libc/src/stdio/printf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ endif()
if(LIBC_CONF_PRINTF_FLOAT_TO_STR_NO_SPECIALIZE_LD)
list(APPEND printf_config_copts "-DLIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD")
endif()
if(LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320)
list(APPEND printf_config_copts "-DLIBC_COPT_FLOAT_TO_STR_USE_FLOAT320")
endif()
if(LIBC_CONF_PRINTF_DISABLE_FIXED_POINT)
list(APPEND printf_config_copts "-DLIBC_COPT_PRINTF_DISABLE_FIXED_POINT")
endif()
Expand Down
4 changes: 4 additions & 0 deletions libc/src/stdio/printf_core/converter_atlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
// defines convert_float_decimal
// defines convert_float_dec_exp
// defines convert_float_dec_auto
#ifdef LIBC_COPT_FLOAT_TO_STR_USE_FLOAT320
#include "src/stdio/printf_core/float_dec_converter_limited.h"
#else
#include "src/stdio/printf_core/float_dec_converter.h"
#endif
// defines convert_float_hex_exp
#include "src/stdio/printf_core/float_hex_converter.h"
#endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
Expand Down
Loading