@@ -1041,6 +1041,22 @@ inline auto digits2(size_t value) noexcept -> const char* {
10411041 return &data[value * 2 ];
10421042}
10431043
1044+ // Given i in [0, 100), let x be the first 7 digits after
1045+ // the decimal point of i / 100 in base 2, the first 2 bytes
1046+ // after digits2_i(x) is the string representation of i.
1047+ inline auto digits2_i (size_t value) -> const char* {
1048+ alignas (2 ) static const char data[] =
1049+ " 00010203 0405060707080910 1112"
1050+ " 131414151617 18192021 222324 "
1051+ " 25262728 2930313232333435 3637"
1052+ " 383939404142 43444546 474849 "
1053+ " 50515253 5455565757585960 6162"
1054+ " 636464656667 68697071 727374 "
1055+ " 75767778 7980818282838485 8687"
1056+ " 888989909192 93949596 979899 " ;
1057+ return &data[value * 2 ];
1058+ }
1059+
10441060template <typename Char> constexpr auto getsign (sign s) -> Char {
10451061 return static_cast <char >(((' ' << 24 ) | (' +' << 16 ) | (' -' << 8 )) >>
10461062 (static_cast <int >(s) * 8 ));
@@ -1209,6 +1225,16 @@ FMT_CONSTEXPR20 FMT_INLINE void write2digits(Char* out, size_t value) {
12091225 *out = static_cast <Char>(' 0' + value % 10 );
12101226}
12111227
1228+ template <typename Char>
1229+ FMT_INLINE void write2digits_i (Char* out, size_t value) {
1230+ if (std::is_same<Char, char >::value && !FMT_OPTIMIZE_SIZE) {
1231+ memcpy (out, digits2_i (value), 2 );
1232+ return ;
1233+ }
1234+ *out++ = static_cast <Char>(digits2_i (value)[0 ]);
1235+ *out = static_cast <Char>(digits2_i (value)[1 ]);
1236+ }
1237+
12121238// Formats a decimal unsigned integer value writing to out pointing to a buffer
12131239// of specified size. The caller must ensure that the buffer is large enough.
12141240template <typename Char, typename UInt>
@@ -1217,12 +1243,19 @@ FMT_CONSTEXPR20 auto do_format_decimal(Char* out, UInt value, int size)
12171243 FMT_ASSERT (size >= count_digits (value), " invalid digit count" );
12181244 unsigned n = to_unsigned (size);
12191245 while (value >= 100 ) {
1220- // Integer division is slow so do it for a group of two digits instead
1221- // of for every digit. The idea comes from the talk by Alexandrescu
1222- // "Three Optimization Tips for C++". See speed-test for a comparison.
12231246 n -= 2 ;
1224- write2digits (out + n, static_cast <unsigned >(value % 100 ));
1225- value /= 100 ;
1247+ if (!is_constant_evaluated () && sizeof (UInt) == 4 ) {
1248+ auto p = value * static_cast <uint64_t >((1ull << 39 ) / 100 + 1 );
1249+ write2digits_i (out + n, p >> (39 - 7 ) & ((1 << 7 ) - 1 ));
1250+ value = static_cast <UInt>(p >> 39 ) +
1251+ (static_cast <UInt>(value >= (100u << 25 )) << 25 );
1252+ } else {
1253+ // Integer division is slow so do it for a group of two digits instead
1254+ // of for every digit. The idea comes from the talk by Alexandrescu
1255+ // "Three Optimization Tips for C++". See speed-test for a comparison.
1256+ write2digits (out + n, static_cast <unsigned >(value % 100 ));
1257+ value /= 100 ;
1258+ }
12261259 }
12271260 if (value >= 10 ) {
12281261 n -= 2 ;
0 commit comments