Skip to content

Commit d6eda4d

Browse files
authored
Merge pull request #1236 from puji4810/bench
Add benchmark for hex string conversion performance
2 parents c64c696 + 1aa9730 commit d6eda4d

File tree

3 files changed

+702
-3
lines changed

3 files changed

+702
-3
lines changed

benchmark/0022.from_chars/CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ FetchContent_MakeAvailable(fast_float)
1010

1111
add_executable(benchmark.0022.from_chars ${CMAKE_CURRENT_LIST_DIR}/atoi_vs_from_chars.cc)
1212
target_include_directories(benchmark.0022.from_chars PRIVATE ${CMAKE_SOURCE_DIR}/include)
13-
1413
target_compile_features(benchmark.0022.from_chars PRIVATE cxx_std_20)
1514

1615
# Prefer official target if provided by fast_float; otherwise include headers directly
@@ -22,3 +21,16 @@ else()
2221
target_include_directories(benchmark.0022.from_chars PRIVATE ${fast_float_SOURCE_DIR}/include)
2322
endif()
2423
endif()
24+
25+
add_executable(benchmark.0022.from_chars_hex ${CMAKE_CURRENT_LIST_DIR}/atoi_vs_from_chars_hex.cc)
26+
target_include_directories(benchmark.0022.from_chars_hex PRIVATE ${CMAKE_SOURCE_DIR}/include)
27+
target_compile_features(benchmark.0022.from_chars_hex PRIVATE cxx_std_20)
28+
29+
if (TARGET fast_float::fast_float)
30+
target_link_libraries(benchmark.0022.from_chars_hex PRIVATE fast_float::fast_float)
31+
else()
32+
FetchContent_GetProperties(fast_float)
33+
if (fast_float_SOURCE_DIR)
34+
target_include_directories(benchmark.0022.from_chars_hex PRIVATE ${fast_float_SOURCE_DIR}/include)
35+
endif()
36+
endif()

benchmark/0022.from_chars/atoi_vs_from_chars.cc

Lines changed: 297 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,24 @@
66
#include <fast_io_dsal/string.h>
77
#include <charconv>
88
#include <fast_float/fast_float.h>
9+
#include <limits>
910

1011
using namespace fast_io::io;
1112

13+
// NOTE:
14+
// This benchmark compares the core integer parsing routines under identical preconditions.
15+
// For each line, the pointer `p` is positioned at the first decimal digit; there is no
16+
// leading whitespace, sign character, or base prefix in the [p, end) slice.
17+
// The fast_io branch calls
18+
// scan_int_contiguous_none_simd_space_part_define_impl<10, char>(p, end, v);
19+
// the std::from_chars and fast_float::from_chars integer overloads are invoked on the same
20+
// [p, end) range.
21+
// By specification, std::from_chars and fast_float::from_chars for integer types do not
22+
// skip leading whitespace, and scan_int_contiguous_none_simd_space_part_define_impl makes
23+
// the same assumption that any preceding whitespace has already been consumed. Thus the
24+
// starting conditions and termination rules are fully aligned, providing a fair comparison
25+
// of "decimal digit substring → uint64_t" parsing performance.
26+
1227
static std::string make_numbers_buffer(std::size_t n)
1328
{
1429
std::string s;
@@ -26,6 +41,71 @@ static std::string make_numbers_buffer(std::size_t n)
2641
return s;
2742
}
2843

44+
static std::string make_fixed_digits_numbers_buffer(std::size_t digits, std::size_t n)
45+
{
46+
constexpr std::uint64_t pow10[20]{
47+
1ull,
48+
10ull,
49+
100ull,
50+
1000ull,
51+
10000ull,
52+
100000ull,
53+
1000000ull,
54+
10000000ull,
55+
100000000ull,
56+
1000000000ull,
57+
10000000000ull,
58+
100000000000ull,
59+
1000000000000ull,
60+
10000000000000ull,
61+
100000000000000ull,
62+
1000000000000000ull,
63+
10000000000000000ull,
64+
100000000000000000ull,
65+
1000000000000000000ull,
66+
10000000000000000000ull};
67+
68+
if (digits == 0 || digits > 20)
69+
{
70+
return {};
71+
}
72+
73+
std::string s;
74+
s.reserve(n * (digits + 1));
75+
76+
std::uint64_t lo{};
77+
std::uint64_t count{};
78+
79+
if (digits == 1)
80+
{
81+
lo = 0;
82+
count = 10;
83+
}
84+
else if (digits < 20)
85+
{
86+
lo = pow10[digits - 1];
87+
count = pow10[digits] - lo;
88+
}
89+
else
90+
{
91+
lo = pow10[19];
92+
count = (std::numeric_limits<std::uint64_t>::max)() - lo + 1;
93+
}
94+
95+
for (std::size_t i{}; i != n; ++i)
96+
{
97+
auto old = s.size();
98+
s.resize(old + 32);
99+
auto *first = s.data() + old;
100+
auto *last = s.data() + s.size();
101+
std::uint64_t value = lo + static_cast<std::uint64_t>(i % count);
102+
auto res = std::to_chars(first, last - 1, value);
103+
*res.ptr = '\n';
104+
s.resize(static_cast<std::size_t>(res.ptr - s.data() + 1));
105+
}
106+
return s;
107+
}
108+
29109
int main()
30110
{
31111
constexpr std::size_t N = 10'000'000;
@@ -39,7 +119,7 @@ int main()
39119
{
40120
lines += (*p == '\n');
41121
}
42-
fast_io::println("lines=", lines);
122+
fast_io::perrln("lines=", lines);
43123
}
44124

45125
// atoi
@@ -85,7 +165,60 @@ int main()
85165
}
86166

87167

88-
// fast_io char_digit_to_literal
168+
// fast_io core sto (dec) - scalar/SWAR path:
169+
// scan_int_contiguous_none_simd_space_part_define_impl (no SSE4.1 fast path)
170+
{
171+
fast_io::timer t(u8"fastio_scan_int_none_simd_dec");
172+
std::uint64_t sum{};
173+
char const *p = begin;
174+
while (p < end)
175+
{
176+
std::uint64_t v{};
177+
auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10, char>(
178+
p, end, v);
179+
if (res.code != fast_io::parse_code::ok)
180+
{
181+
break;
182+
}
183+
sum += v;
184+
p = res.iter;
185+
if (p < end && *p == '\n')
186+
{
187+
++p;
188+
}
189+
}
190+
std::uint64_t volatile sink = sum;
191+
(void)sink;
192+
}
193+
194+
#if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64))
195+
// fast_io core sto (dec) - SSE4.1-accelerated path:
196+
// scan_int_contiguous_none_space_part_define_impl (may use sse_parse for base-10)
197+
{
198+
fast_io::timer t(u8"fastio_scan_int_sse4_dec");
199+
std::uint64_t sum{};
200+
char const *p = begin;
201+
while (p < end)
202+
{
203+
std::uint64_t v{};
204+
auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10>(p, end, v);
205+
if (res.code != fast_io::parse_code::ok)
206+
{
207+
break;
208+
}
209+
sum += v;
210+
p = res.iter;
211+
if (p < end && *p == '\n')
212+
{
213+
++p;
214+
}
215+
}
216+
std::uint64_t volatile sink = sum;
217+
(void)sink;
218+
}
219+
#endif
220+
221+
// fast_io char_digit_to_literal (hex)
89222
{
90223
fast_io::timer t(u8"fastio_char_digit_to_literal");
91224
std::uint64_t sum{};
@@ -112,6 +245,7 @@ int main()
112245
(void)sink;
113246
}
114247

248+
115249
// fast_float
116250
{
117251
fast_io::timer t(u8"fast_float_from_chars");
@@ -132,4 +266,165 @@ int main()
132266
(void)sink;
133267
}
134268

269+
// Per-digit decimal benchmarks: 1-digit up to theoretical max decimal digits of uint64_t (20)
270+
{
271+
constexpr std::size_t max_digits = 20;
272+
for (std::size_t digits = 1; digits <= max_digits; ++digits)
273+
{
274+
auto buf_fixed = make_fixed_digits_numbers_buffer(digits, N);
275+
char const *fixed_begin = buf_fixed.data();
276+
char const *fixed_end = buf_fixed.data() + buf_fixed.size();
277+
278+
fast_io::perrln("\n\nfixed_digits=", digits, " lines=", N);
279+
280+
{
281+
std::size_t lines{};
282+
for (char const *p = fixed_begin; p < fixed_end; ++p)
283+
{
284+
lines += (*p == '\n');
285+
}
286+
fast_io::perrln("lines=", lines);
287+
}
288+
289+
// atoi on fixed-width decimal substrings
290+
{
291+
fast_io::timer t(u8"atoi_fixed");
292+
std::uint64_t sum{};
293+
char const *p = fixed_begin;
294+
while (p < fixed_end)
295+
{
296+
int v = std::atoi(p);
297+
sum += static_cast<std::uint64_t>(v);
298+
while (p < fixed_end && *p >= '0' && *p <= '9')
299+
{
300+
++p;
301+
}
302+
if (p < fixed_end && *p == '\n')
303+
{
304+
++p;
305+
}
306+
}
307+
std::uint64_t volatile sink = sum;
308+
(void)sink;
309+
}
310+
311+
// std::from_chars on fixed-width decimal substrings
312+
{
313+
fast_io::timer t(u8"std_from_chars_fixed");
314+
std::uint64_t sum{};
315+
char const *p = fixed_begin;
316+
while (p < fixed_end)
317+
{
318+
std::uint64_t v{};
319+
auto res = std::from_chars(p, fixed_end, v);
320+
sum += v;
321+
p = res.ptr;
322+
if (p < fixed_end && *p == '\n')
323+
{
324+
++p;
325+
}
326+
}
327+
std::uint64_t volatile sink = sum;
328+
(void)sink;
329+
}
330+
331+
// fast_io char_digit_to_literal on fixed-width decimal substrings
332+
{
333+
fast_io::timer t(u8"fastio_char_digit_to_literal_fixed");
334+
std::uint64_t sum{};
335+
char const *p = fixed_begin;
336+
while (p < fixed_end)
337+
{
338+
using UCh = std::make_unsigned_t<char>;
339+
std::uint64_t v{};
340+
char const *q = p;
341+
while (q < fixed_end && *q != '\n')
342+
{
343+
UCh ch = static_cast<UCh>(*q);
344+
if (fast_io::details::char_digit_to_literal<10, char>(ch))
345+
{
346+
break;
347+
}
348+
v = v * 10 + static_cast<std::uint64_t>(ch);
349+
++q;
350+
}
351+
sum += v;
352+
p = (q < fixed_end ? q + 1 : q);
353+
}
354+
std::uint64_t volatile sink = sum;
355+
(void)sink;
356+
}
357+
358+
// fast_io core sto (dec) - scalar/SWAR path on fixed-width decimal substrings
359+
{
360+
fast_io::timer t(u8"fastio_scan_int_none_simd_dec_fixed");
361+
std::uint64_t sum{};
362+
char const *p = fixed_begin;
363+
while (p < fixed_end)
364+
{
365+
std::uint64_t v{};
366+
auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10, char>(
367+
p, fixed_end, v);
368+
if (res.code != fast_io::parse_code::ok)
369+
{
370+
break;
371+
}
372+
sum += v;
373+
p = res.iter;
374+
if (p < fixed_end && *p == '\n')
375+
{
376+
++p;
377+
}
378+
}
379+
std::uint64_t volatile sink = sum;
380+
(void)sink;
381+
}
382+
383+
#if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64))
384+
// fast_io core sto (dec) - SSE4.1-accelerated path on fixed-width decimal substrings
385+
{
386+
fast_io::timer t(u8"fastio_scan_int_sse4_dec_fixed");
387+
std::uint64_t sum{};
388+
char const *p = fixed_begin;
389+
while (p < fixed_end)
390+
{
391+
std::uint64_t v{};
392+
auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10>(p, fixed_end, v);
393+
if (res.code != fast_io::parse_code::ok)
394+
{
395+
break;
396+
}
397+
sum += v;
398+
p = res.iter;
399+
if (p < fixed_end && *p == '\n')
400+
{
401+
++p;
402+
}
403+
}
404+
std::uint64_t volatile sink = sum;
405+
(void)sink;
406+
}
407+
#endif
408+
409+
// fast_float integer from_chars on fixed-width decimal substrings
410+
{
411+
fast_io::timer t(u8"fast_float_from_chars_fixed");
412+
std::uint64_t sum{};
413+
char const *p = fixed_begin;
414+
while (p < fixed_end)
415+
{
416+
std::uint64_t v{};
417+
auto res = fast_float::from_chars(p, fixed_end, v);
418+
sum += v;
419+
p = res.ptr;
420+
if (p < fixed_end && *p == '\n')
421+
{
422+
++p;
423+
}
424+
}
425+
std::uint64_t volatile sink = sum;
426+
(void)sink;
427+
}
428+
}
429+
}
135430
}

0 commit comments

Comments
 (0)