66#include < fast_io_dsal/string.h>
77#include < charconv>
88#include < fast_float/fast_float.h>
9+ #include < limits>
910
1011using namespace fast_io ::io;
1112
13+ // NOTE:
14+ // This benchmark compares the core integer parsing routines under identical preconditions.
15+ // For each line, the pointer `p` is positioned at the first decimal digit; there is no
16+ // leading whitespace, sign character, or base prefix in the [p, end) slice.
17+ // The fast_io branch calls
18+ // scan_int_contiguous_none_simd_space_part_define_impl<10, char>(p, end, v);
19+ // the std::from_chars and fast_float::from_chars integer overloads are invoked on the same
20+ // [p, end) range.
21+ // By specification, std::from_chars and fast_float::from_chars for integer types do not
22+ // skip leading whitespace, and scan_int_contiguous_none_simd_space_part_define_impl makes
23+ // the same assumption that any preceding whitespace has already been consumed. Thus the
24+ // starting conditions and termination rules are fully aligned, providing a fair comparison
25+ // of "decimal digit substring → uint64_t" parsing performance.
26+
1227static std::string make_numbers_buffer (std::size_t n)
1328{
1429 std::string s;
@@ -26,6 +41,71 @@ static std::string make_numbers_buffer(std::size_t n)
2641 return s;
2742}
2843
44+ static std::string make_fixed_digits_numbers_buffer (std::size_t digits, std::size_t n)
45+ {
46+ constexpr std::uint64_t pow10[20 ]{
47+ 1ull ,
48+ 10ull ,
49+ 100ull ,
50+ 1000ull ,
51+ 10000ull ,
52+ 100000ull ,
53+ 1000000ull ,
54+ 10000000ull ,
55+ 100000000ull ,
56+ 1000000000ull ,
57+ 10000000000ull ,
58+ 100000000000ull ,
59+ 1000000000000ull ,
60+ 10000000000000ull ,
61+ 100000000000000ull ,
62+ 1000000000000000ull ,
63+ 10000000000000000ull ,
64+ 100000000000000000ull ,
65+ 1000000000000000000ull ,
66+ 10000000000000000000ull };
67+
68+ if (digits == 0 || digits > 20 )
69+ {
70+ return {};
71+ }
72+
73+ std::string s;
74+ s.reserve (n * (digits + 1 ));
75+
76+ std::uint64_t lo{};
77+ std::uint64_t count{};
78+
79+ if (digits == 1 )
80+ {
81+ lo = 0 ;
82+ count = 10 ;
83+ }
84+ else if (digits < 20 )
85+ {
86+ lo = pow10[digits - 1 ];
87+ count = pow10[digits] - lo;
88+ }
89+ else
90+ {
91+ lo = pow10[19 ];
92+ count = (std::numeric_limits<std::uint64_t >::max)() - lo + 1 ;
93+ }
94+
95+ for (std::size_t i{}; i != n; ++i)
96+ {
97+ auto old = s.size ();
98+ s.resize (old + 32 );
99+ auto *first = s.data () + old;
100+ auto *last = s.data () + s.size ();
101+ std::uint64_t value = lo + static_cast <std::uint64_t >(i % count);
102+ auto res = std::to_chars (first, last - 1 , value);
103+ *res.ptr = ' \n ' ;
104+ s.resize (static_cast <std::size_t >(res.ptr - s.data () + 1 ));
105+ }
106+ return s;
107+ }
108+
29109int main ()
30110{
31111 constexpr std::size_t N = 10'000'000 ;
@@ -39,7 +119,7 @@ int main()
39119 {
40120 lines += (*p == ' \n ' );
41121 }
42- fast_io::println (" lines=" , lines);
122+ fast_io::perrln (" lines=" , lines);
43123 }
44124
45125 // atoi
@@ -85,7 +165,60 @@ int main()
85165 }
86166
87167
88- // fast_io char_digit_to_literal
168+ // fast_io core sto (dec) - scalar/SWAR path:
169+ // scan_int_contiguous_none_simd_space_part_define_impl (no SSE4.1 fast path)
170+ {
171+ fast_io::timer t (u8" fastio_scan_int_none_simd_dec" );
172+ std::uint64_t sum{};
173+ char const *p = begin;
174+ while (p < end)
175+ {
176+ std::uint64_t v{};
177+ auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10 , char >(
178+ p, end, v);
179+ if (res.code != fast_io::parse_code::ok)
180+ {
181+ break ;
182+ }
183+ sum += v;
184+ p = res.iter ;
185+ if (p < end && *p == ' \n ' )
186+ {
187+ ++p;
188+ }
189+ }
190+ std::uint64_t volatile sink = sum;
191+ (void )sink;
192+ }
193+
194+ #if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64))
195+ // fast_io core sto (dec) - SSE4.1-accelerated path:
196+ // scan_int_contiguous_none_space_part_define_impl (may use sse_parse for base-10)
197+ {
198+ fast_io::timer t (u8" fastio_scan_int_sse4_dec" );
199+ std::uint64_t sum{};
200+ char const *p = begin;
201+ while (p < end)
202+ {
203+ std::uint64_t v{};
204+ auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10 >(p, end, v);
205+ if (res.code != fast_io::parse_code::ok)
206+ {
207+ break ;
208+ }
209+ sum += v;
210+ p = res.iter ;
211+ if (p < end && *p == ' \n ' )
212+ {
213+ ++p;
214+ }
215+ }
216+ std::uint64_t volatile sink = sum;
217+ (void )sink;
218+ }
219+ #endif
220+
221+ // fast_io char_digit_to_literal (hex)
89222 {
90223 fast_io::timer t (u8" fastio_char_digit_to_literal" );
91224 std::uint64_t sum{};
@@ -112,6 +245,7 @@ int main()
112245 (void )sink;
113246 }
114247
248+
115249 // fast_float
116250 {
117251 fast_io::timer t (u8" fast_float_from_chars" );
@@ -132,4 +266,165 @@ int main()
132266 (void )sink;
133267 }
134268
269+ // Per-digit decimal benchmarks: 1-digit up to theoretical max decimal digits of uint64_t (20)
270+ {
271+ constexpr std::size_t max_digits = 20 ;
272+ for (std::size_t digits = 1 ; digits <= max_digits; ++digits)
273+ {
274+ auto buf_fixed = make_fixed_digits_numbers_buffer (digits, N);
275+ char const *fixed_begin = buf_fixed.data ();
276+ char const *fixed_end = buf_fixed.data () + buf_fixed.size ();
277+
278+ fast_io::perrln (" \n\n fixed_digits=" , digits, " lines=" , N);
279+
280+ {
281+ std::size_t lines{};
282+ for (char const *p = fixed_begin; p < fixed_end; ++p)
283+ {
284+ lines += (*p == ' \n ' );
285+ }
286+ fast_io::perrln (" lines=" , lines);
287+ }
288+
289+ // atoi on fixed-width decimal substrings
290+ {
291+ fast_io::timer t (u8" atoi_fixed" );
292+ std::uint64_t sum{};
293+ char const *p = fixed_begin;
294+ while (p < fixed_end)
295+ {
296+ int v = std::atoi (p);
297+ sum += static_cast <std::uint64_t >(v);
298+ while (p < fixed_end && *p >= ' 0' && *p <= ' 9' )
299+ {
300+ ++p;
301+ }
302+ if (p < fixed_end && *p == ' \n ' )
303+ {
304+ ++p;
305+ }
306+ }
307+ std::uint64_t volatile sink = sum;
308+ (void )sink;
309+ }
310+
311+ // std::from_chars on fixed-width decimal substrings
312+ {
313+ fast_io::timer t (u8" std_from_chars_fixed" );
314+ std::uint64_t sum{};
315+ char const *p = fixed_begin;
316+ while (p < fixed_end)
317+ {
318+ std::uint64_t v{};
319+ auto res = std::from_chars (p, fixed_end, v);
320+ sum += v;
321+ p = res.ptr ;
322+ if (p < fixed_end && *p == ' \n ' )
323+ {
324+ ++p;
325+ }
326+ }
327+ std::uint64_t volatile sink = sum;
328+ (void )sink;
329+ }
330+
331+ // fast_io char_digit_to_literal on fixed-width decimal substrings
332+ {
333+ fast_io::timer t (u8" fastio_char_digit_to_literal_fixed" );
334+ std::uint64_t sum{};
335+ char const *p = fixed_begin;
336+ while (p < fixed_end)
337+ {
338+ using UCh = std::make_unsigned_t <char >;
339+ std::uint64_t v{};
340+ char const *q = p;
341+ while (q < fixed_end && *q != ' \n ' )
342+ {
343+ UCh ch = static_cast <UCh>(*q);
344+ if (fast_io::details::char_digit_to_literal<10 , char >(ch))
345+ {
346+ break ;
347+ }
348+ v = v * 10 + static_cast <std::uint64_t >(ch);
349+ ++q;
350+ }
351+ sum += v;
352+ p = (q < fixed_end ? q + 1 : q);
353+ }
354+ std::uint64_t volatile sink = sum;
355+ (void )sink;
356+ }
357+
358+ // fast_io core sto (dec) - scalar/SWAR path on fixed-width decimal substrings
359+ {
360+ fast_io::timer t (u8" fastio_scan_int_none_simd_dec_fixed" );
361+ std::uint64_t sum{};
362+ char const *p = fixed_begin;
363+ while (p < fixed_end)
364+ {
365+ std::uint64_t v{};
366+ auto res = ::fast_io::details::scan_int_contiguous_none_simd_space_part_define_impl<10 , char >(
367+ p, fixed_end, v);
368+ if (res.code != fast_io::parse_code::ok)
369+ {
370+ break ;
371+ }
372+ sum += v;
373+ p = res.iter ;
374+ if (p < fixed_end && *p == ' \n ' )
375+ {
376+ ++p;
377+ }
378+ }
379+ std::uint64_t volatile sink = sum;
380+ (void )sink;
381+ }
382+
383+ #if defined(__SSE4_1__) && (defined(__x86_64__) || defined(_M_AMD64))
384+ // fast_io core sto (dec) - SSE4.1-accelerated path on fixed-width decimal substrings
385+ {
386+ fast_io::timer t (u8" fastio_scan_int_sse4_dec_fixed" );
387+ std::uint64_t sum{};
388+ char const *p = fixed_begin;
389+ while (p < fixed_end)
390+ {
391+ std::uint64_t v{};
392+ auto res = ::fast_io::details::scan_int_contiguous_none_space_part_define_impl<10 >(p, fixed_end, v);
393+ if (res.code != fast_io::parse_code::ok)
394+ {
395+ break ;
396+ }
397+ sum += v;
398+ p = res.iter ;
399+ if (p < fixed_end && *p == ' \n ' )
400+ {
401+ ++p;
402+ }
403+ }
404+ std::uint64_t volatile sink = sum;
405+ (void )sink;
406+ }
407+ #endif
408+
409+ // fast_float integer from_chars on fixed-width decimal substrings
410+ {
411+ fast_io::timer t (u8" fast_float_from_chars_fixed" );
412+ std::uint64_t sum{};
413+ char const *p = fixed_begin;
414+ while (p < fixed_end)
415+ {
416+ std::uint64_t v{};
417+ auto res = fast_float::from_chars (p, fixed_end, v);
418+ sum += v;
419+ p = res.ptr ;
420+ if (p < fixed_end && *p == ' \n ' )
421+ {
422+ ++p;
423+ }
424+ }
425+ std::uint64_t volatile sink = sum;
426+ (void )sink;
427+ }
428+ }
429+ }
135430}
0 commit comments