Skip to content

Commit b1ddc5e

Browse files
committed
more slide work
1 parent aaef69e commit b1ddc5e

File tree

1 file changed

+36
-6
lines changed

1 file changed

+36
-6
lines changed

cppcon2025/cppcon_2025_slides.md

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ if (!needs_escape)
620620

621621
# Optimization #3: Fast Integer serialization
622622

623+
(`std::to_chars`)
623624

624625
```cpp
625626
while(number >= 10) {
@@ -637,7 +638,7 @@ Writing from the end
637638

638639
```cpp
639640
while(number >= 100) {
640-
memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2);
641+
memcpy(write_pointer - 1, &decimal_table[(pv % 100)*2], 2);
641642
write_pointer -= 2;
642643
pv /= 100;
643644
}
@@ -655,6 +656,11 @@ if(number >= 10) {
655656
- Useful to compute quickly the number of digits
656657

657658
```cpp
659+
template <typename number_type>
660+
int int_log2(number_type x) {
661+
return 63 - leading_zeroes(uint64_t(x) | 1);
662+
}
663+
658664
int fast_digit_count_64(uint64_t x) {
659665
static uint64_t table[] = {9,
660666
99,
@@ -670,17 +676,41 @@ int fast_digit_count_64(uint64_t x) {
670676
}
671677
```
672678
673-
674679
---
675680
676-
# Does fast integer processing matter?
677-
678-
Replace fast digit count by naive approach
681+
# Could use SIMD if we wanted to
679682
683+
**Don't try to understand:**:
680684
```cpp
681-
std::to_string(value).length(); // Allocates string just to count!
685+
__m128i to_string_avx512ifma(uint64_t n) {
686+
uint64_t n_15_08 = n / 100000000;
687+
uint64_t n_07_00 = n % 100000000;
688+
__m512i bcstq_h = _mm512_set1_epi64(n_15_08);
689+
__m512i bcstq_l = _mm512_set1_epi64(n_07_00);
690+
__m512i zmmzero = _mm512_castsi128_si512(_mm_cvtsi64_si128(0x1A1A400));
691+
__m512i zmmTen = _mm512_set1_epi64(10);
692+
__m512i asciiZero = _mm512_set1_epi64('0');
693+
__m512i ifma_const = _mm512_setr_epi64(0x00000000002af31dc, ...);
694+
__m512i permb_const = _mm512_castsi128_si512(_mm_set_epi8(0x78, ...));
695+
__m512i lowbits_h = _mm512_madd52lo_epu64(zmmzero, bcstq_h, ifma_const);
696+
__m512i lowbits_l = _mm512_madd52lo_epu64(zmmzero, bcstq_l, ifma_const);
697+
__m512i highbits_h = _mm512_madd52hi_epu64(asciiZero, zmmTen, lowbits_h);
698+
__m512i highbits_l = _mm512_madd52hi_epu64(asciiZero, zmmTen, lowbits_l);
699+
__m512i perm = _mm512_permutex2var_epi8(highbits_h, permb_const, highbits_l);
700+
__m128i digits_15_0 = _mm512_castsi512_si128(perm);
701+
return digits_15_0;
702+
}
682703
```
683704

705+
---
706+
707+
# Does fast integer processing matter?
708+
709+
* Replace fast digit count by naive approach based on `std::to_string`
710+
```cpp
711+
std::to_string(value).length();
712+
```
713+
* Only 34% worse in one dataset.
684714

685715
---
686716

0 commit comments

Comments
 (0)