Skip to content

Commit 8e1fda5

Browse files
committed
fixes and cleanup for the parse_number_string function.
exponent value is always less than in16_t. original main: Tests: time is: 44278ms. size of my tests 389.0k size of my program 164.0k my main: Tests: time is: 42015ms. size of my tests 389.0k size of my program 164.0k my main with FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN Tests: time is: 41282ms. size of my tests 386.5k size of my program 161.5k After this I'll try it on my partner Linux machine with the original tests and compare much better.
1 parent 2da25b5 commit 8e1fda5

File tree

6 files changed

+143
-138
lines changed

6 files changed

+143
-138
lines changed

include/fast_float/ascii_number.h

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
5050
read8_to_u64(UC const *chars) {
5151
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
5252
uint64_t val = 0;
53-
for (int i = 0; i < 8; ++i) {
53+
for (uint8_t i = 0; i != 8; ++i) {
5454
val |= uint64_t(uint8_t(*chars)) << (i * 8);
5555
++chars;
5656
}
@@ -261,7 +261,7 @@ enum class parse_error {
261261

262262
template <typename UC> struct parsed_number_string_t {
263263
uint64_t mantissa{0};
264-
int32_t exponent{0};
264+
int16_t exponent{0};
265265
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
266266
bool negative{false};
267267
#endif
@@ -327,18 +327,17 @@ parse_number_string(UC const *p, UC const *pend,
327327

328328
UC const *const start_digits = p;
329329

330-
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
331-
330+
// an unsigned int avoids signed overflows (which are bad)
332331
while ((p != pend) && is_integer(*p)) {
333332
// a multiplication by 10 is cheaper than an arbitrary integer
334333
// multiplication
335-
i = 10 * i +
334+
answer.mantissa = 10 * answer.mantissa +
336335
uint64_t(*p -
337336
UC('0')); // might overflow, we will handle the overflow later
338337
++p;
339338
}
340339
UC const *const end_of_integer_part = p;
341-
uint32_t digit_count = uint32_t(end_of_integer_part - start_digits);
340+
uint16_t digit_count = uint16_t(end_of_integer_part - start_digits);
342341
answer.integer = span<UC const>(start_digits, digit_count);
343342
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
344343
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
@@ -353,43 +352,46 @@ parse_number_string(UC const *p, UC const *pend,
353352
}
354353
#endif
355354

356-
int32_t exponent = 0;
357355
bool const has_decimal_point = (p != pend) && (*p == options.decimal_point);
358356
if (has_decimal_point) {
359357
++p;
360358
UC const *before = p;
359+
uint16_t fraction = 0;
361360
// can occur at most twice without overflowing, but let it occur more, since
362361
// for integers with many digits, digit parsing is the primary bottleneck.
363-
loop_parse_if_eight_digits(p, pend, i);
362+
loop_parse_if_eight_digits(p, pend, answer.mantissa);
364363

365364
while ((p != pend) && is_integer(*p)) {
366365
uint8_t const digit = uint8_t(*p - UC('0'));
367-
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
366+
answer.mantissa = answer.mantissa * 10 + digit; // in rare cases, this will overflow, but that's ok
368367
++p;
369368
}
370-
exponent = int32_t(before - p);
371-
answer.fraction = span<UC const>(before, uint32_t(p - before));
372-
digit_count -= exponent;
373-
}
369+
fraction = uint16_t(before - p);
370+
answer.fraction = span<UC const>(before, uint16_t(p - before));
371+
digit_count -= fraction;
374372
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
375-
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
376-
// at least 1 digit in fractional part
377-
if (has_decimal_point && exponent == 0) {
378-
return report_parse_error<UC>(p,
373+
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
374+
// at least 1 digit in fractional part
375+
if (has_decimal_point && fraction == 0) {
376+
return report_parse_error<UC>(p,
379377
parse_error::no_digits_in_fractional_part);
378+
}
380379
}
381-
}
382380
#endif
381+
}
383382
else if (digit_count == 0) { // we must have encountered at least one integer!
384383
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
385384
}
386-
int32_t exp_number = 0; // explicit exponential part
385+
// We have now parsed the integer and the fraction part of the mantissa.
386+
387+
// Now we can parse the exponent part.
387388
if (p != pend &&
388389
(uint8_t(options.format & chars_format::scientific) &&
389-
((UC('e') == *p) || (UC('E') == *p)))
390+
(UC('e') == *p) || (UC('E') == *p))
390391
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
391392
|| (uint8_t(options.format & detail::basic_fortran_fmt) &&
392-
(UC('d') == *p) || (UC('D') == *p))
393+
((UC('+') == *p) || (UC('-') == *p) ||
394+
(UC('d') == *p) || (UC('D') == *p)))
393395
#endif
394396
) {
395397
UC const *location_of_e = p;
@@ -416,14 +418,16 @@ parse_number_string(UC const *p, UC const *pend,
416418
p = location_of_e;
417419
} else {
418420
while ((p != pend) && is_integer(*p)) {
419-
uint8_t const digit = uint8_t(*p - UC('0'));
420-
exp_number = 10 * exp_number + digit;
421+
if (answer.exponent < 0x1000) {
422+
// check for exponent overflow if we have too many digits.
423+
uint8_t const digit = uint8_t(*p - UC('0'));
424+
answer.exponent = 10 * answer.exponent + digit;
425+
}
421426
++p;
422427
}
423428
if (neg_exp) {
424-
exp_number = -exp_number;
429+
answer.exponent = -answer.exponent;
425430
}
426-
exponent += exp_number;
427431
}
428432
} else {
429433
// If it scientific and not fixed, we have to bail out.
@@ -459,30 +463,28 @@ parse_number_string(UC const *p, UC const *pend,
459463
// Let us start again, this time, avoiding overflows.
460464
// We don't need to check if is_integer, since we use the
461465
// pre-tokenized spans from above.
462-
i = 0;
466+
answer.mantissa = 0;
463467
p = answer.integer.ptr;
464468
UC const *int_end = p + answer.integer.len();
465469
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
466-
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
467-
i = i * 10 + uint64_t(*p - UC('0'));
470+
while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != int_end)) {
471+
answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0'));
468472
++p;
469473
}
470-
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
471-
exponent = uint32_t(end_of_integer_part - p) + exp_number;
474+
if (answer.mantissa >= minimal_nineteen_digit_integer) { // We have a big integers
475+
answer.exponent += int16_t(end_of_integer_part - p);
472476
} else { // We have a value with a fractional component.
473477
p = answer.fraction.ptr;
474478
UC const *frac_end = p + answer.fraction.len();
475-
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
476-
i = i * 10 + uint64_t(*p - UC('0'));
479+
while ((answer.mantissa < minimal_nineteen_digit_integer) && (p != frac_end)) {
480+
answer.mantissa = answer.mantissa * 10 + uint64_t(*p - UC('0'));
477481
++p;
478482
}
479-
exponent = uint32_t(answer.fraction.ptr - p) + exp_number;
483+
answer.exponent += int16_t(answer.fraction.ptr - p);
480484
}
481-
// We have now corrected both exponent and i, to a truncated value
485+
// We have now corrected both exponent and mantissa, to a truncated value
482486
}
483487
}
484-
answer.exponent = exponent;
485-
answer.mantissa = i;
486488
return answer;
487489
}
488490

@@ -518,7 +520,6 @@ parse_int_string(UC const *p, UC const *pend, T &value,
518520

519521
UC const *const start_num = p;
520522

521-
// use SIMD here?
522523
while (p != pend && *p == UC('0')) {
523524
++p;
524525
}
@@ -541,7 +542,7 @@ parse_int_string(UC const *p, UC const *pend, T &value,
541542
p++;
542543
}
543544

544-
uint32_t const digit_count = uint32_t(p - start_digits);
545+
uint16_t const digit_count = uint16_t(p - start_digits);
545546

546547
if (digit_count == 0) {
547548
if (has_leading_zeros) {

0 commit comments

Comments
 (0)