@@ -60,24 +60,21 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
6060 . cast :: < __m256i > ( )
6161 . read_unaligned ( ) ;
6262 let s = _mm256_add_epi64 ( o, _mm256_set1_epi64x ( d as i64 ) ) ;
63-
63+ let s = _mm256_and_si256 ( s, _mm256_set1_epi8 ( 0x80u8 as i8 ) ) ;
64+ let s = _mm256_cmpeq_epi64 ( s, _mm256_set1_epi64x ( 0 ) ) ;
6465 let s = _mm256_movemask_epi8 ( s) as u32 ;
6566
66- sum += ( ( s & 0xFF_00_00_00 ) == 0 ) as u64 ;
67- sum += ( ( s & 0x00_FF_00_00 ) == 0 ) as u64 ;
68- sum += ( ( s & 0x00_00_FF_00 ) == 0 ) as u64 ;
69- sum += ( ( s & 0x00_00_00_FF ) == 0 ) as u64 ;
67+ sum += s. count_ones ( ) as u64 / 8 ;
7068 }
7169 if j > 0 {
7270 let o = other. as_ptr ( ) . cast :: < __m256i > ( ) . read_unaligned ( ) ;
7371 let s = _mm256_add_epi64 ( o, _mm256_set1_epi64x ( d as i64 ) ) ;
74-
72+ let s = _mm256_and_si256 ( s, _mm256_set1_epi8 ( 0x80u8 as i8 ) ) ;
73+ let s = _mm256_cmpeq_epi64 ( s, _mm256_set1_epi64x ( 0 ) ) ;
7574 let s = _mm256_movemask_epi8 ( s) as u32 ;
7675
77- let s = !( !s << ( 3 - j) * 8 ) ;
78- sum += ( ( s & 0x00_FF_00_00 ) == 0 ) as u64 ;
79- sum += ( ( s & 0x00_00_FF_00 ) == 0 ) as u64 ;
80- sum += ( ( s & 0x00_00_00_FF ) == 0 ) as u64 ;
76+ let s = s & ( 0xFF_FF_FF >> ( ( 3 - j) * 8 ) ) ;
77+ sum += s. count_ones ( ) as u64 / 8 ;
8178 }
8279
8380 let d = d + 0x7A7A7A7A7A ;
0 commit comments