@@ -620,6 +620,7 @@ if (!needs_escape)
620620
621621# Optimization #3 : Fast Integer serialization
622622
623+ (`std::to_chars`)
623624
624625```cpp
625626while (number >= 10 ) {
@@ -637,7 +638,7 @@ Writing from the end
637638
638639``` cpp
639640while (number >= 100 ) {
640- memcpy (write_pointer - 1, &internal:: decimal_table[ (pv % 100)* 2] , 2);
641+ memcpy (write_pointer - 1, &decimal_table[ (pv % 100)* 2] , 2);
641642 write_pointer -= 2;
642643 pv /= 100;
643644}
@@ -655,6 +656,11 @@ if(number >= 10) {
655656- Useful to compute quickly the number of digits
656657
657658``` cpp
659+ template <typename number_type>
660+ int int_log2 (number_type x) {
661+ return 63 - leading_zeroes(uint64_t(x) | 1);
662+ }
663+
658664int fast_digit_count_64(uint64_t x) {
659665 static uint64_t table[ ] = {9,
660666 99,
@@ -670,17 +676,41 @@ int fast_digit_count_64(uint64_t x) {
670676}
671677```
672678
673-
674679---
675680
676- # Does fast integer processing matter?
677-
678- Replace fast digit count by naive approach
681+ # Could use SIMD if we wanted to
679682
683+ **Don't try to understand:**:
680684```cpp
681- std::to_string(value).length(); // Allocates string just to count!
685+ __m128i to_string_avx512ifma(uint64_t n) {
686+ uint64_t n_15_08 = n / 100000000;
687+ uint64_t n_07_00 = n % 100000000;
688+ __m512i bcstq_h = _mm512_set1_epi64(n_15_08);
689+ __m512i bcstq_l = _mm512_set1_epi64(n_07_00);
690+ __m512i zmmzero = _mm512_castsi128_si512(_mm_cvtsi64_si128(0x1A1A400));
691+ __m512i zmmTen = _mm512_set1_epi64(10);
692+ __m512i asciiZero = _mm512_set1_epi64('0');
693+ __m512i ifma_const = _mm512_setr_epi64(0x00000000002af31dc, ...);
694+ __m512i permb_const = _mm512_castsi128_si512(_mm_set_epi8(0x78, ...));
695+ __m512i lowbits_h = _mm512_madd52lo_epu64(zmmzero, bcstq_h, ifma_const);
696+ __m512i lowbits_l = _mm512_madd52lo_epu64(zmmzero, bcstq_l, ifma_const);
697+ __m512i highbits_h = _mm512_madd52hi_epu64(asciiZero, zmmTen, lowbits_h);
698+ __m512i highbits_l = _mm512_madd52hi_epu64(asciiZero, zmmTen, lowbits_l);
699+ __m512i perm = _mm512_permutex2var_epi8(highbits_h, permb_const, highbits_l);
700+ __m128i digits_15_0 = _mm512_castsi512_si128(perm);
701+ return digits_15_0;
702+ }
682703```
683704
705+ ---
706+
707+ # Does fast integer processing matter?
708+
709+ * Replace fast digit count by naive approach based on ` std::to_string `
710+ ``` cpp
711+ std::to_string (value).length();
712+ ```
713+ * Only 34% worse in one dataset.
684714
685715---
686716
0 commit comments