@@ -519,6 +519,11 @@ template <typename T, typename OutputIt>
519519constexpr auto to_pointer (OutputIt, size_t ) -> T* {
520520 return nullptr ;
521521}
522+ template <typename T> FMT_CONSTEXPR auto to_pointer (T*& ptr, size_t n) -> T* {
523+ T* begin = ptr;
524+ ptr += n;
525+ return begin;
526+ }
522527template <typename T>
523528FMT_CONSTEXPR20 auto to_pointer (basic_appender<T> it, size_t n) -> T* {
524529 buffer<T>& buf = get_container (it);
@@ -1169,8 +1174,9 @@ FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) {
11691174 *out = static_cast <Char>(' 0' + value % 10 );
11701175}
11711176
1172- // Formats a decimal unsigned integer value writing to out pointing to a buffer
1173- // of specified size. The caller must ensure that the buffer is large enough.
1177+ // Formats a decimal unsigned integer value and writes to out pointing to a
1178+ // buffer of specified size. The caller must ensure that the buffer is large
1179+ // enough.
11741180template <typename Char, typename UInt>
11751181FMT_CONSTEXPR20 auto do_format_decimal (Char* out, UInt value, int size)
11761182 -> Char* {
@@ -1455,6 +1461,75 @@ template <typename T> struct decimal_fp {
14551461template <typename T> FMT_API auto to_decimal (T x) noexcept -> decimal_fp<T>;
14561462} // namespace dragonbox
14571463
1464+ // Compilers should be able to optimize this into the ror instruction.
1465+ FMT_CONSTEXPR inline auto rotr (uint32_t n, uint32_t r) noexcept -> uint32_t {
1466+ r &= 31 ;
1467+ return (n >> r) | (n << (32 - r));
1468+ }
1469+ FMT_CONSTEXPR inline auto rotr (uint64_t n, uint32_t r) noexcept -> uint64_t {
1470+ r &= 63 ;
1471+ return (n >> r) | (n << (64 - r));
1472+ }
1473+
1474+ // Remove trailing zeros from n and return the number of zeros removed (float)
1475+ FMT_INLINE int remove_trailing_zeros (uint32_t & n, int s = 0 ) noexcept {
1476+ FMT_ASSERT (n != 0 , " " );
1477+ // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
1478+ constexpr uint32_t mod_inv_5 = 0xcccccccd ;
1479+ constexpr uint32_t mod_inv_25 = 0xc28f5c29 ; // = mod_inv_5 * mod_inv_5
1480+
1481+ while (true ) {
1482+ auto q = rotr (n * mod_inv_25, 2 );
1483+ if (q > max_value<uint32_t >() / 100 ) break ;
1484+ n = q;
1485+ s += 2 ;
1486+ }
1487+ auto q = rotr (n * mod_inv_5, 1 );
1488+ if (q <= max_value<uint32_t >() / 10 ) {
1489+ n = q;
1490+ s |= 1 ;
1491+ }
1492+ return s;
1493+ }
1494+
1495+ // Removes trailing zeros and returns the number of zeros removed (double)
1496+ FMT_INLINE int remove_trailing_zeros (uint64_t & n) noexcept {
1497+ FMT_ASSERT (n != 0 , " " );
1498+
1499+ // This magic number is ceil(2^90 / 10^8).
1500+ constexpr uint64_t magic_number = 12379400392853802749ull ;
1501+ auto nm = umul128 (n, magic_number);
1502+
1503+ // Is n is divisible by 10^8?
1504+ if ((nm.high () & ((1ull << (90 - 64 )) - 1 )) == 0 && nm.low () < magic_number) {
1505+ // If yes, work with the quotient...
1506+ auto n32 = static_cast <uint32_t >(nm.high () >> (90 - 64 ));
1507+ // ... and use the 32 bit variant of the function
1508+ int s = remove_trailing_zeros (n32, 8 );
1509+ n = n32;
1510+ return s;
1511+ }
1512+
1513+ // If n is not divisible by 10^8, work with n itself.
1514+ constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd ;
1515+ constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29 ; // mod_inv_5 * mod_inv_5
1516+
1517+ int s = 0 ;
1518+ while (true ) {
1519+ auto q = rotr (n * mod_inv_25, 2 );
1520+ if (q > max_value<uint64_t >() / 100 ) break ;
1521+ n = q;
1522+ s += 2 ;
1523+ }
1524+ auto q = rotr (n * mod_inv_5, 1 );
1525+ if (q <= max_value<uint64_t >() / 10 ) {
1526+ n = q;
1527+ s |= 1 ;
1528+ }
1529+
1530+ return s;
1531+ }
1532+
14581533// Returns true iff Float has the implicit bit which is not stored.
14591534template <typename Float> constexpr auto has_implicit_bit () -> bool {
14601535 // An 80-bit FP number has a 64-bit significand an no implicit bit.
@@ -1486,7 +1561,7 @@ template <typename Float> constexpr auto exponent_bias() -> int {
14861561FMT_CONSTEXPR inline auto compute_exp_size (int exp) -> int {
14871562 auto prefix_size = 2 ; // sign + 'e'
14881563 auto abs_exp = exp >= 0 ? exp : -exp;
1489- if (exp < 100 ) return prefix_size + 2 ;
1564+ if (abs_exp < 100 ) return prefix_size + 2 ;
14901565 return prefix_size + (abs_exp >= 1000 ? 4 : 3 );
14911566}
14921567
@@ -3413,6 +3488,8 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value, format_specs specs,
34133488 } else if (is_fast_float<T>::value && !is_constant_evaluated ()) {
34143489 // Use Dragonbox for the shortest format.
34153490 auto dec = dragonbox::to_decimal (static_cast <fast_float_t <T>>(value));
3491+ if (dec.significand != 0 )
3492+ dec.exponent += remove_trailing_zeros (dec.significand );
34163493 return write_float<Char>(out, dec, specs, s, exp_upper, loc);
34173494 }
34183495 }
@@ -3455,9 +3532,29 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt {
34553532 return write_nonfinite<Char>(out, std::isnan (value), {}, s);
34563533
34573534 auto dec = dragonbox::to_decimal (static_cast <fast_float_t <T>>(value));
3458- int significand_size = count_digits (dec.significand );
3459- int exp = dec.exponent + significand_size - 1 ;
3460- if (use_fixed (exp, detail::exp_upper<T>())) {
3535+ auto significand = dec.significand ;
3536+ auto exponent = dec.exponent ;
3537+
3538+ uint32_t block1, block2 = 0 ;
3539+ int num_block2_digits = 0 ;
3540+ constexpr unsigned ten_pow_8 = 100000000u ;
3541+ if (significand >= ten_pow_8) {
3542+ block1 = static_cast <unsigned >(significand / ten_pow_8);
3543+ block2 = static_cast <unsigned >(significand) - block1 * ten_pow_8;
3544+ if (block2 != 0 ) num_block2_digits = 8 - remove_trailing_zeros (block2);
3545+ exponent += 8 ;
3546+ } else {
3547+ block1 = static_cast <unsigned >(significand);
3548+ }
3549+ if (block2 == 0 && block1 != 0 ) exponent += remove_trailing_zeros (block1);
3550+
3551+ int num_block1_digits = count_digits (block1);
3552+ exponent += num_block1_digits - 1 ;
3553+ int significand_size = num_block1_digits + num_block2_digits;
3554+
3555+ if (use_fixed (exponent, detail::exp_upper<T>())) {
3556+ if (dec.significand != 0 )
3557+ dec.exponent += remove_trailing_zeros (dec.significand );
34613558 return write_fixed<Char, fallback_digit_grouping<Char>>(
34623559 out, dec, significand_size, Char (' .' ), {}, s);
34633560 }
@@ -3466,14 +3563,43 @@ FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt {
34663563 auto has_decimal_point = significand_size != 1 ;
34673564 size_t size =
34683565 to_unsigned ((s != sign::none ? 1 : 0 ) + significand_size +
3469- (has_decimal_point ? 1 : 0 ) + compute_exp_size (exp));
3566+ (has_decimal_point ? 1 : 0 ) + compute_exp_size (exponent));
3567+
3568+ if (auto ptr = to_pointer<Char>(out, size)) {
3569+ if (s != sign::none) *ptr++ = Char (' -' );
3570+ if (has_decimal_point) {
3571+ auto begin = ptr;
3572+ ptr = format_decimal<Char>(ptr, block1, num_block1_digits + 1 );
3573+ *begin = begin[1 ];
3574+ begin[1 ] = ' .' ;
3575+ if (num_block2_digits != 0 ) {
3576+ int n = num_block2_digits;
3577+ while (n > 2 ) {
3578+ n -= 2 ;
3579+ write2digits (ptr + n, block2 % 100 );
3580+ block2 /= 100 ;
3581+ }
3582+ if (n > 1 ) {
3583+ n -= 2 ;
3584+ write2digits (ptr + n, block2);
3585+ } else {
3586+ ptr[--n] = static_cast <Char>(' 0' + block2);
3587+ }
3588+ ptr += num_block2_digits;
3589+ }
3590+ } else {
3591+ *ptr++ = static_cast <Char>(' 0' + block1);
3592+ }
3593+ *ptr++ = Char (' e' );
3594+ ptr = write_exponent<Char>(exponent, ptr);
3595+ return out;
3596+ }
34703597 auto it = reserve (out, size);
34713598 if (s != sign::none) *it++ = Char (' -' );
3472- // Insert a decimal point after the first digit and add an exponent.
3473- it = write_significand (it, dec.significand , significand_size, 1 ,
3599+ it = write_significand (it, significand, significand_size, 1 ,
34743600 has_decimal_point ? Char (' .' ) : Char ());
34753601 *it++ = Char (' e' );
3476- it = write_exponent<Char>(exp , it);
3602+ it = write_exponent<Char>(exponent , it);
34773603 return base_iterator (out, it);
34783604}
34793605
0 commit comments