Skip to content

Commit c6b9f32

Browse files
committed
Maybe this is better?
1 parent b65d232 commit c6b9f32

File tree

1 file changed

+18
-11
lines changed

1 file changed

+18
-11
lines changed

src/day21.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -276,17 +276,24 @@ unsafe fn inner(s: &str, lut: &[u64; 1000]) -> u64 {
276276
0, 0, 100, 1,
277277
);
278278
let v = _mm256_madd_epi16(v, mul);
279-
let com_l = _mm256_i32gather_epi32::<8>(lut.as_ptr().cast(), v);
280-
let com_h = _mm256_i32gather_epi32::<8>(lut.as_ptr().cast::<i32>().offset(1), v);
281-
282-
let h1 = _mm256_unpacklo_epi32(com_l, com_h);
283-
let h2 = _mm256_unpackhi_epi32(com_l, com_h);
284-
let v = _mm256_add_epi64(h1, h2);
285-
let vs = _mm256_shuffle_epi32::<{ (1 << 6) | (0 << 4) | (3 << 2) | 2 }>(v);
286-
let v = _mm256_add_epi64(v, vs);
287-
288-
_mm256_extract_epi64::<0>(v) as u64 + _mm256_extract_epi64::<2>(v) as u64
289-
// let mut p = [0u64; 256 / 64];
279+
lut[_mm256_extract_epi32::<0>(v) as u32 as usize]
280+
+ lut[_mm256_extract_epi32::<1>(v) as u32 as usize]
281+
+ lut[_mm256_extract_epi32::<4>(v) as u32 as usize]
282+
+ lut[_mm256_extract_epi32::<5>(v) as u32 as usize]
283+
+ lut[_mm256_extract_epi32::<7>(v) as u32 as usize]
284+
// let com_l = _mm256_i32gather_epi32::<8>(lut.as_ptr().cast(), v);
285+
// let com_h = _mm256_i32gather_epi32::<8>(lut.as_ptr().cast::<i32>().offset(1), v);
286+
287+
// let h1 = _mm256_unpacklo_epi32(com_l, com_h);
288+
// let h2 = _mm256_unpackhi_epi32(com_l, com_h);
289+
290+
// let sum = 0;
291+
// let v = _mm256_add_epi64(h1, h2);
292+
// let vs = _mm256_shuffle_epi32::<{ (1 << 6) | (0 << 4) | (3 << 2) | 2 }>(v);
293+
// let v = _mm256_add_epi64(v, vs);
294+
295+
// _mm256_extract_epi64::<0>(v) as u64 + _mm256_extract_epi64::<2>(v) as u64
296+
// let mut p = [0u32; 256 / 32];
290297
// p.as_mut_ptr().cast::<__m256i>().write(v);
291298
// println!("{:?}", &p);
292299

0 commit comments

Comments
 (0)