Skip to content

Commit 2dd43ec

Browse files
committed
optimize uint16 parsing
Signed-off-by: Shikhar <[email protected]>
1 parent 20e27e5 commit 2dd43ec

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

include/fast_float/ascii_number.h

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,26 @@ read8_to_u64(UC const *chars) {
6868
return val;
6969
}
7070

71+
// Read 4 UC into a u32. Truncates UC if not char.
72+
template <typename UC>
73+
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
74+
read4_to_u32(UC const *chars) {
75+
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
76+
uint32_t val = 0;
77+
for (int i = 0; i < 4; ++i) {
78+
val |= uint32_t(uint8_t(*chars)) << (i * 8);
79+
++chars;
80+
}
81+
return val;
82+
}
83+
uint32_t val;
84+
::memcpy(&val, chars, sizeof(uint32_t));
85+
#if FASTFLOAT_IS_BIG_ENDIAN == 1
86+
val = byteswap(val);
87+
#endif
88+
return val;
89+
}
90+
7191
#ifdef FASTFLOAT_SSE2
7292

7393
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
@@ -149,6 +169,13 @@ is_made_of_eight_digits_fast(uint64_t val) noexcept {
149169
0x8080808080808080));
150170
}
151171

172+
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
173+
parse_four_digits_unrolled(uint32_t val) noexcept {
174+
val -= 0x30303030;
175+
val = (val * 10) + (val >> 8);
176+
return (((val & 0x00FF00FF) * 0x00640001) >> 16) & 0xFFFF;
177+
}
178+
152179
#ifdef FASTFLOAT_HAS_SIMD
153180

154181
// Call this if chars might not be 8 digits.
@@ -606,6 +633,95 @@ parse_int_string(UC const *p, UC const *pend, T &value,
606633
}
607634
}
608635

636+
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
637+
if (base == 10) {
638+
const size_t len = size_t(pend - p);
639+
if (len == 0) {
640+
if (has_leading_zeros) {
641+
value = 0;
642+
answer.ec = std::errc();
643+
answer.ptr = p;
644+
} else {
645+
answer.ec = std::errc::invalid_argument;
646+
answer.ptr = first;
647+
}
648+
return answer;
649+
}
650+
651+
uint32_t digits;
652+
if (len >= 4) {
653+
digits = read4_to_u32(p);
654+
} else {
655+
uint32_t b0 = uint32_t(uint8_t(p[0]));
656+
uint32_t b1 = (len > 1) ? uint32_t(uint8_t(p[1])) : 0xFFu;
657+
uint32_t b2 = (len > 2) ? uint32_t(uint8_t(p[2])) : 0xFFu;
658+
digits = b0 | (b1 << 8) | (b2 << 16) | (0xFFu << 24);
659+
}
660+
661+
uint32_t magic =
662+
((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
663+
uint32_t nd = (magic == 0) ? 4u : (uint32_t(countr_zero_32(magic)) >> 3);
664+
665+
if (nd == 0) {
666+
if (has_leading_zeros) {
667+
value = 0;
668+
answer.ec = std::errc();
669+
answer.ptr = p;
670+
return answer;
671+
}
672+
answer.ec = std::errc::invalid_argument;
673+
answer.ptr = first;
674+
return answer;
675+
}
676+
677+
if (nd < 4) {
678+
// mask out non-digit bytes and replace with '0' (0x30)
679+
uint32_t mask = 0xFFFFFFFFu >> ((4u - nd) * 8u);
680+
uint32_t padded = (digits & mask) | (~mask & 0x30303030u);
681+
uint32_t v = parse_four_digits_unrolled(padded);
682+
static constexpr uint32_t divs[] = {0, 1000, 100, 10};
683+
value = (uint16_t)(v / divs[nd]);
684+
answer.ec = std::errc();
685+
answer.ptr = p + nd;
686+
return answer;
687+
}
688+
689+
uint32_t v = parse_four_digits_unrolled(digits);
690+
691+
uint32_t d4 = (len > 4) ? uint32_t(p[4] - '0') : 10u;
692+
if (d4 > 9u) {
693+
value = (uint16_t)v;
694+
answer.ec = std::errc();
695+
answer.ptr = p + 4;
696+
return answer;
697+
}
698+
699+
if (len > 5) {
700+
uint32_t d5 = uint32_t(p[5]) - uint32_t('0');
701+
if (d5 <= 9u) {
702+
const UC *q = p + 6;
703+
while (q < pend && uint32_t(*q) - uint32_t('0') <= 9u)
704+
++q;
705+
answer.ec = std::errc::result_out_of_range;
706+
answer.ptr = q;
707+
return answer;
708+
}
709+
}
710+
711+
// overflow check
712+
if (v > 6553u || (v == 6553u && d4 > 5u)) {
713+
answer.ec = std::errc::result_out_of_range;
714+
answer.ptr = p + 5;
715+
return answer;
716+
}
717+
718+
value = (uint16_t)(v * 10u + d4);
719+
answer.ec = std::errc();
720+
answer.ptr = p + 5;
721+
return answer;
722+
}
723+
}
724+
609725
uint64_t i = 0;
610726
if (base == 10) {
611727
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible

0 commit comments

Comments
 (0)