Skip to content

Commit e6df365

Browse files
committed
simpler optimizations
Signed-off-by: Shikhar <[email protected]>
1 parent 9d2217d commit e6df365

File tree

1 file changed

+34
-69
lines changed

1 file changed

+34
-69
lines changed

include/fast_float/ascii_number.h

Lines changed: 34 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
3232
// able to optimize it well.
3333
template <typename UC>
3434
fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
35-
return !(c > UC('9') || c < UC('0'));
35+
return (unsigned)(c - UC('0')) <= 9u;
3636
}
3737

3838
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@@ -83,11 +83,10 @@ read4_to_u32(UC const *chars) {
8383
uint32_t val;
8484
::memcpy(&val, chars, sizeof(uint32_t));
8585
#if FASTFLOAT_IS_BIG_ENDIAN == 1
86-
val = byteswap(val);
86+
val = byteswap_32(val);
8787
#endif
8888
return val;
8989
}
90-
9190
#ifdef FASTFLOAT_SSE2
9291

9392
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
@@ -169,6 +168,11 @@ is_made_of_eight_digits_fast(uint64_t val) noexcept {
169168
0x8080808080808080));
170169
}
171170

171+
fastfloat_really_inline constexpr bool
172+
is_made_of_four_digits_fast(uint32_t val) noexcept {
173+
return !((((val + 0x46464646) | (val - 0x30303030)) & 0x80808080));
174+
}
175+
172176
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
173177
parse_four_digits_unrolled(uint32_t val) noexcept {
174178
val -= 0x30303030;
@@ -648,77 +652,38 @@ parse_int_string(UC const *p, UC const *pend, T &value,
648652
return answer;
649653
}
650654

651-
uint32_t digits;
652655
if (len >= 4) {
653-
digits = read4_to_u32(p);
654-
} else {
655-
uint32_t b0 = uint32_t(uint8_t(p[0]));
656-
uint32_t b1 = (len > 1) ? uint32_t(uint8_t(p[1])) : 0xFFu;
657-
uint32_t b2 = (len > 2) ? uint32_t(uint8_t(p[2])) : 0xFFu;
658-
digits = b0 | (b1 << 8) | (b2 << 16) | (0xFFu << 24);
659-
}
660-
661-
uint32_t magic =
662-
((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
663-
uint32_t nd = (magic == 0) ? 4u : (uint32_t(countr_zero_32(magic)) >> 3);
664-
665-
if (nd == 0) {
666-
if (has_leading_zeros) {
667-
value = 0;
656+
uint32_t digits = read4_to_u32(p);
657+
if (is_made_of_four_digits_fast(digits)) {
658+
uint32_t v = parse_four_digits_unrolled(digits);
659+
if (len >= 5 && is_integer(p[4])) {
660+
v = v * 10 + uint32_t(p[4] - '0');
661+
if (len >= 6 && is_integer(p[5])) {
662+
answer.ec = std::errc::result_out_of_range;
663+
const UC *q = p + 5;
664+
while (q != pend && is_integer(*q)) {
665+
q++;
666+
}
667+
answer.ptr = q;
668+
return answer;
669+
}
670+
if (v > 65535) {
671+
answer.ec = std::errc::result_out_of_range;
672+
answer.ptr = p + 5;
673+
return answer;
674+
}
675+
value = uint16_t(v);
676+
answer.ec = std::errc();
677+
answer.ptr = p + 5;
678+
return answer;
679+
}
680+
// 4 digits
681+
value = uint16_t(v);
668682
answer.ec = std::errc();
669-
answer.ptr = p;
670-
return answer;
671-
}
672-
answer.ec = std::errc::invalid_argument;
673-
answer.ptr = first;
674-
return answer;
675-
}
676-
677-
if (nd < 4) {
678-
// mask out non-digit bytes and replace with '0' (0x30)
679-
uint32_t mask = 0xFFFFFFFFu >> ((4u - nd) * 8u);
680-
uint32_t padded = (digits & mask) | (~mask & 0x30303030u);
681-
uint32_t v = parse_four_digits_unrolled(padded);
682-
constexpr uint32_t divs[] = {0, 1000, 100, 10};
683-
value = (uint16_t)(v / divs[nd]);
684-
answer.ec = std::errc();
685-
answer.ptr = p + nd;
686-
return answer;
687-
}
688-
689-
uint32_t v = parse_four_digits_unrolled(digits);
690-
691-
uint32_t d4 = (len > 4) ? uint32_t(p[4] - '0') : 10u;
692-
if (d4 > 9u) {
693-
value = (uint16_t)v;
694-
answer.ec = std::errc();
695-
answer.ptr = p + 4;
696-
return answer;
697-
}
698-
699-
if (len > 5) {
700-
uint32_t d5 = uint32_t(p[5]) - uint32_t('0');
701-
if (d5 <= 9u) {
702-
const UC *q = p + 6;
703-
while (q < pend && uint32_t(*q) - uint32_t('0') <= 9u)
704-
++q;
705-
answer.ec = std::errc::result_out_of_range;
706-
answer.ptr = q;
683+
answer.ptr = p + 4;
707684
return answer;
708685
}
709686
}
710-
711-
// overflow check
712-
if (v > 6553u || (v == 6553u && d4 > 5u)) {
713-
answer.ec = std::errc::result_out_of_range;
714-
answer.ptr = p + 5;
715-
return answer;
716-
}
717-
718-
value = (uint16_t)(v * 10u + d4);
719-
answer.ec = std::errc();
720-
answer.ptr = p + 5;
721-
return answer;
722687
}
723688
}
724689

0 commit comments

Comments
 (0)