Skip to content

Commit b7fb05b

Browse files
committed
* Added additional compile option FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED for improve cache usage in high load.
* Small optimization in code generation for auto vectorization.
1 parent 7041f91 commit b7fb05b

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,13 +383,15 @@ int main() {
383383
There is a really common use case in mathematical and other abstract syntax tree (AST)-like parsers that already processes
384384
the sign and all other symbols before any number by itself. In this case you can use FastFloat to only parse positive numbers
385385
in all supported formats with macros `FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN`, which significantly reduce the code size
386-
and improve performance. You also can use macros `FASTFLOAT_ISNOT_CHECKED_BOUNDS` if your code already checks bounds;
387-
it's very likely because all parsers need to check the first character by itself before parsing. Additionally, you can use
388-
macros `FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED` if you only need `FE_TONEAREST` rounding mode in the parsing;
389-
this option also improves performance a bit and reduces code size.
390-
386+
and improve performance. An additional option for high performance and very fast processing is
387+
`FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED`; it reduces data size and speeds up parsing because a data cache is used for your
388+
real data, not for a 256-byte table that flushes out at least 3 cache lines on x86. You also can use macros
389+
`FASTFLOAT_ISNOT_CHECKED_BOUNDS` if your code already checks bounds; it's very likely because all parsers need to check the first
390+
character by itself before parsing. Additionally, you can use macros `FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED` if you only need
391+
`FE_TONEAREST` rounding mode in the parsing; this option also improves performance a bit and reduces code size.
391392
```C++
392393
#define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
394+
#define FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
393395
#define FASTFLOAT_ISNOT_CHECKED_BOUNDS
394396
#define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
395397
#include "fast_float/fast_float.h"

benchmarks/benchmark.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
// #define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
3+
// #define FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
34
// #define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
45
// #define FASTFLOAT_ISNOT_CHECKED_BOUNDS
56

@@ -234,6 +235,10 @@ int main(int argc, char **argv) {
234235
std::cout << "# FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN is enabled"
235236
<< std::endl;
236237
#endif
238+
#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
239+
std::cout << "# FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED is enabled"
240+
<< std::endl;
241+
#endif
237242
#ifdef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
238243
std::cout << "# FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED is enabled"
239244
<< std::endl;

include/fast_float/ascii_number.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,14 @@ parse_int_string(UC const *p, UC const *pend, T &value,
642642
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
643643
}
644644
while (p != pend) {
645+
#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
646+
const auto digit = *p;
647+
if (!is_integer(digit)) {
648+
break;
649+
}
650+
#else
645651
auto const digit = ch_to_digit(*p);
652+
#endif
646653
if (digit >= options.base) {
647654
break;
648655
}

include/fast_float/float_common.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ typedef int_fast16_t am_pow_t;
513513
// Bias so we can get the real exponent with an invalid adjusted_mantissa.
514514
constexpr static am_pow_t invalid_am_bias = -0x8000;
515515

516-
struct adjusted_mantissa {
516+
struct alignas(16) adjusted_mantissa {
517517
am_mant_t mantissa;
518518
am_pow_t power2;
519519
adjusted_mantissa() noexcept = default;
@@ -1201,6 +1201,7 @@ template <> constexpr char8_t const *str_const_inf<char8_t>() {
12011201
#endif
12021202

12031203
template <typename = void> struct int_luts {
1204+
#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
12041205
static constexpr uint8_t chdigit[] = {
12051206
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
12061207
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
@@ -1220,6 +1221,7 @@ template <typename = void> struct int_luts {
12201221
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
12211222
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
12221223
255};
1224+
#endif
12231225

12241226
static constexpr uint_fast8_t maxdigits_u64[] = {
12251227
64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
@@ -1250,6 +1252,7 @@ template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
12501252

12511253
#endif
12521254

1255+
#ifndef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
12531256
template <typename UC>
12541257
fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
12551258
// wchar_t and char can be signed, so we need to be careful.
@@ -1259,6 +1262,7 @@ fastfloat_really_inline constexpr uint_fast8_t ch_to_digit(UC c) noexcept {
12591262
static_cast<UnsignedUC>(
12601263
-((static_cast<UnsignedUC>(c) & ~0xFFull) == 0)))];
12611264
}
1265+
#endif
12621266

12631267
fastfloat_really_inline constexpr uint_fast8_t
12641268
max_digits_u64(uint_fast8_t base) noexcept {

0 commit comments

Comments
 (0)