Skip to content

Commit 9f55fbc

Browse files
committed
Use diffrent mulhi algo
1 parent d367cae commit 9f55fbc

File tree

1 file changed

+13
-15
lines changed

1 file changed

+13
-15
lines changed

src/day22.rs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,15 @@ const SEQUENCES: usize = 18 * 18 * 18 * 18;
4848

4949
#[inline(always)]
5050
unsafe fn vmod10(a: __m256i) -> __m256i {
51-
// Algo from LLVM
52-
let prod02 = _mm256_mul_epu32(a, _mm256_set1_epi32(3435973837u32 as i32));
53-
let prod13 = _mm256_mul_epu32(
54-
_mm256_shuffle_epi32::<0xf5>(a),
51+
let ab_hm = _mm256_mul_epu32(
52+
_mm256_srli_epi64::<32>(a),
5553
_mm256_set1_epi32(3435973837u32 as i32),
5654
);
57-
let d = _mm256_unpackhi_epi64(
58-
_mm256_unpacklo_epi32(prod02, prod13),
59-
_mm256_unpackhi_epi32(prod02, prod13),
60-
);
55+
let ab_hm = _mm256_and_si256(ab_hm, _mm256_set1_epi64x(0xFFFFFFFF00000000u64 as i64));
56+
let ab_lm =
57+
_mm256_srli_epi64::<32>(_mm256_mul_epu32(a, _mm256_set1_epi32(3435973837u32 as i32)));
58+
59+
let d = _mm256_or_si256(ab_lm, ab_hm);
6160

6261
let d = _mm256_srli_epi32::<3>(d);
6362
let c = _mm256_mullo_epi32(d, _mm256_set1_epi32(10));
@@ -67,15 +66,14 @@ unsafe fn vmod10(a: __m256i) -> __m256i {
6766
#[inline(always)]
6867
unsafe fn vmod104976(a: __m256i) -> __m256i {
6968
// Algo from LLVM
70-
let prod02 = _mm256_mul_epu32(a, _mm256_set1_epi32(2681326939u32 as i32));
71-
let prod13 = _mm256_mul_epu32(
72-
_mm256_shuffle_epi32::<0xf5>(a),
69+
let ab_hm = _mm256_mul_epu32(
70+
_mm256_srli_epi64::<32>(a),
7371
_mm256_set1_epi32(2681326939u32 as i32),
7472
);
75-
let d = _mm256_unpackhi_epi64(
76-
_mm256_unpacklo_epi32(prod02, prod13),
77-
_mm256_unpackhi_epi32(prod02, prod13),
78-
);
73+
let ab_hm = _mm256_and_si256(ab_hm, _mm256_set1_epi64x(0xFFFFFFFF00000000u64 as i64));
74+
let ab_lm =
75+
_mm256_srli_epi64::<32>(_mm256_mul_epu32(a, _mm256_set1_epi32(2681326939u32 as i32)));
76+
let d = _mm256_or_si256(ab_lm, ab_hm);
7977

8078
let d = _mm256_srli_epi32::<16>(d);
8179
let c = _mm256_mullo_epi32(d, _mm256_set1_epi32(104976));

0 commit comments

Comments
 (0)