Skip to content

Commit fd9cad9

Browse files
authored
Merge pull request #359 from shikharish/uint16
optimize uint16 parsing
2 parents 42ae960 + b14e6a4 commit fd9cad9

File tree

3 files changed

+227
-1
lines changed

3 files changed

+227
-1
lines changed

benchmarks/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,22 @@ FetchContent_MakeAvailable(counters)
1111
add_executable(realbenchmark benchmark.cpp)
1212
target_link_libraries(realbenchmark PRIVATE counters::counters)
1313
add_executable(bench_ip bench_ip.cpp)
14+
add_executable(bench_uint16 bench_uint16.cpp)
1415
target_link_libraries(bench_ip PRIVATE counters::counters)
16+
target_link_libraries(bench_uint16 PRIVATE counters::counters)
1517

1618
set_property(
1719
TARGET realbenchmark
1820
PROPERTY CXX_STANDARD 17)
1921
set_property(
2022
TARGET bench_ip
2123
PROPERTY CXX_STANDARD 17)
24+
set_property(
25+
TARGET bench_uint16
26+
PROPERTY CXX_STANDARD 17)
2227
target_link_libraries(realbenchmark PUBLIC fast_float)
2328
target_link_libraries(bench_ip PUBLIC fast_float)
29+
target_link_libraries(bench_uint16 PUBLIC fast_float)
2430

2531
include(ExternalProject)
2632

benchmarks/bench_uint16.cpp

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#include "counters/bench.h"
2+
#include "fast_float/fast_float.h"
3+
#include <charconv>
4+
#include <cstdint>
5+
#include <cstdio>
6+
#include <cstdlib>
7+
#include <cstring>
8+
#include <random>
9+
#include <atomic>
10+
#include <string>
11+
#include <vector>
12+
13+
void pretty_print(size_t volume, size_t bytes, std::string name,
14+
counters::event_aggregate agg) {
15+
if (agg.inner_count > 1) {
16+
printf("# (inner count: %d)\n", agg.inner_count);
17+
}
18+
printf("%-40s : ", name.c_str());
19+
printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns());
20+
printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns());
21+
printf(" %5.2f ns/ip ", agg.fastest_elapsed_ns() / volume);
22+
if (counters::event_collector().has_events()) {
23+
printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns());
24+
printf(" %5.2f c/ip ", agg.fastest_cycles() / volume);
25+
printf(" %5.2f i/ip ", agg.fastest_instructions() / volume);
26+
printf(" %5.2f c/b ", agg.fastest_cycles() / bytes);
27+
printf(" %5.2f i/b ", agg.fastest_instructions() / bytes);
28+
printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles());
29+
}
30+
printf("\n");
31+
}
32+
33+
enum class parse_method { standard, fast_float };
34+
35+
void validate(const std::string &buffer, const std::vector<uint16_t> &expected,
36+
char delimiter) {
37+
const char *p = buffer.data();
38+
const char *pend = p + buffer.size();
39+
40+
for (size_t i = 0; i < expected.size(); i++) {
41+
uint16_t val;
42+
auto r = fast_float::from_chars(p, pend, val);
43+
if (r.ec != std::errc() || val != expected[i]) {
44+
printf("Validation failed at index %zu: expected %u, got %u\n", i,
45+
expected[i], val);
46+
std::abort();
47+
}
48+
p = r.ptr;
49+
if (i + 1 < expected.size()) {
50+
if (p >= pend || *p != delimiter) {
51+
printf("Validation failed at index %zu: delimiter mismatch\n", i);
52+
std::abort();
53+
}
54+
++p;
55+
}
56+
}
57+
58+
if (p != pend) {
59+
printf("Validation failed: trailing bytes remain\n");
60+
std::abort();
61+
}
62+
printf("Validation passed!\n");
63+
}
64+
65+
int main() {
66+
constexpr size_t N = 500000;
67+
constexpr char delimiter = ',';
68+
std::mt19937 rng(1234);
69+
std::uniform_int_distribution<int> dist(0, 65535);
70+
71+
std::vector<uint16_t> expected;
72+
expected.reserve(N);
73+
74+
std::string buffer;
75+
buffer.reserve(N * 6); // up to 5 digits + delimiter
76+
77+
for (size_t i = 0; i < N; ++i) {
78+
uint16_t val = (uint16_t)dist(rng);
79+
expected.push_back(val);
80+
std::string s = std::to_string(val);
81+
buffer.append(s);
82+
if (i + 1 < N) {
83+
buffer.push_back(delimiter);
84+
}
85+
}
86+
87+
size_t total_bytes = buffer.size();
88+
89+
validate(buffer, expected, delimiter);
90+
91+
volatile uint64_t sink = 0;
92+
93+
pretty_print(N, total_bytes, "parse_uint16_std_fromchars",
94+
counters::bench([&]() {
95+
uint64_t sum = 0;
96+
const char *p = buffer.data();
97+
const char *pend = p + buffer.size();
98+
for (size_t i = 0; i < N; ++i) {
99+
uint16_t value = 0;
100+
auto r = std::from_chars(p, pend, value);
101+
if (r.ec != std::errc())
102+
std::abort();
103+
sum += value;
104+
p = r.ptr;
105+
if (i + 1 < N) {
106+
if (p >= pend || *p != delimiter)
107+
std::abort();
108+
++p;
109+
}
110+
}
111+
if (p != pend)
112+
std::abort();
113+
sink += sum;
114+
}));
115+
116+
pretty_print(N, total_bytes, "parse_uint16_fastfloat", counters::bench([&]() {
117+
uint64_t sum = 0;
118+
const char *p = buffer.data();
119+
const char *pend = p + buffer.size();
120+
for (size_t i = 0; i < N; ++i) {
121+
uint16_t value = 0;
122+
auto r = fast_float::from_chars(p, pend, value);
123+
if (r.ec != std::errc())
124+
std::abort();
125+
sum += value;
126+
p = r.ptr;
127+
if (i + 1 < N) {
128+
if (p >= pend || *p != delimiter)
129+
std::abort();
130+
++p;
131+
}
132+
}
133+
if (p != pend)
134+
std::abort();
135+
sink += sum;
136+
}));
137+
138+
return EXIT_SUCCESS;
139+
}

include/fast_float/ascii_number.h

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
3232
// able to optimize it well.
3333
template <typename UC>
3434
fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
35-
return !(c > UC('9') || c < UC('0'));
35+
return (unsigned)(c - UC('0')) <= 9u;
3636
}
3737

3838
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
@@ -68,6 +68,25 @@ read8_to_u64(UC const *chars) {
6868
return val;
6969
}
7070

71+
// Read 4 UC into a u32. Truncates UC if not char.
72+
template <typename UC>
73+
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
74+
read4_to_u32(UC const *chars) {
75+
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
76+
uint32_t val = 0;
77+
for (int i = 0; i < 4; ++i) {
78+
val |= uint32_t(uint8_t(*chars)) << (i * 8);
79+
++chars;
80+
}
81+
return val;
82+
}
83+
uint32_t val;
84+
::memcpy(&val, chars, sizeof(uint32_t));
85+
#if FASTFLOAT_IS_BIG_ENDIAN == 1
86+
val = byteswap_32(val);
87+
#endif
88+
return val;
89+
}
7190
#ifdef FASTFLOAT_SSE2
7291

7392
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
@@ -149,6 +168,18 @@ is_made_of_eight_digits_fast(uint64_t val) noexcept {
149168
0x8080808080808080));
150169
}
151170

171+
fastfloat_really_inline constexpr bool
172+
is_made_of_four_digits_fast(uint32_t val) noexcept {
173+
return !((((val + 0x46464646) | (val - 0x30303030)) & 0x80808080));
174+
}
175+
176+
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
177+
parse_four_digits_unrolled(uint32_t val) noexcept {
178+
val -= 0x30303030;
179+
val = (val * 10) + (val >> 8);
180+
return (((val & 0x00FF00FF) * 0x00640001) >> 16) & 0xFFFF;
181+
}
182+
152183
#ifdef FASTFLOAT_HAS_SIMD
153184

154185
// Call this if chars might not be 8 digits.
@@ -606,6 +637,56 @@ parse_int_string(UC const *p, UC const *pend, T &value,
606637
}
607638
}
608639

640+
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
641+
if (base == 10) {
642+
const size_t len = size_t(pend - p);
643+
if (len == 0) {
644+
if (has_leading_zeros) {
645+
value = 0;
646+
answer.ec = std::errc();
647+
answer.ptr = p;
648+
} else {
649+
answer.ec = std::errc::invalid_argument;
650+
answer.ptr = first;
651+
}
652+
return answer;
653+
}
654+
655+
if (len >= 4) {
656+
uint32_t digits = read4_to_u32(p);
657+
if (is_made_of_four_digits_fast(digits)) {
658+
uint32_t v = parse_four_digits_unrolled(digits);
659+
if (len >= 5 && is_integer(p[4])) {
660+
v = v * 10 + uint32_t(p[4] - '0');
661+
if (len >= 6 && is_integer(p[5])) {
662+
answer.ec = std::errc::result_out_of_range;
663+
const UC *q = p + 5;
664+
while (q != pend && is_integer(*q)) {
665+
q++;
666+
}
667+
answer.ptr = q;
668+
return answer;
669+
}
670+
if (v > 65535) {
671+
answer.ec = std::errc::result_out_of_range;
672+
answer.ptr = p + 5;
673+
return answer;
674+
}
675+
value = uint16_t(v);
676+
answer.ec = std::errc();
677+
answer.ptr = p + 5;
678+
return answer;
679+
}
680+
// 4 digits
681+
value = uint16_t(v);
682+
answer.ec = std::errc();
683+
answer.ptr = p + 4;
684+
return answer;
685+
}
686+
}
687+
}
688+
}
689+
609690
uint64_t i = 0;
610691
if (base == 10) {
611692
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible

0 commit comments

Comments
 (0)