Skip to content

Commit 3ca189f

Browse files
committed
Much faster SIMD implementation
1 parent abdfcfe commit 3ca189f

File tree

3 files changed

+134
-88
lines changed

3 files changed

+134
-88
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
315315
| 11 | [Radioisotope Thermoelectric Generators](https://adventofcode.com/2016/day/11) | [Source](src/year2016/day11.rs) | 785 |
316316
| 12 | [Leonardo's Monorail](https://adventofcode.com/2016/day/12) | [Source](src/year2016/day12.rs) | 1 |
317317
| 13 | [A Maze of Twisty Little Cubicles](https://adventofcode.com/2016/day/13) | [Source](src/year2016/day13.rs) | 4 |
318-
| 14 | [One-Time Pad](https://adventofcode.com/2016/day/14) | [Source](src/year2016/day14.rs) | 434000 |
318+
| 14 | [One-Time Pad](https://adventofcode.com/2016/day/14) | [Source](src/year2016/day14.rs) | 79000 |
319319
| 15 | [Timing is Everything](https://adventofcode.com/2016/day/15) | [Source](src/year2016/day15.rs) | 1 |
320320
| 16 | [Dragon Checksum](https://adventofcode.com/2016/day/16) | [Source](src/year2016/day16.rs) | 1 |
321321
| 17 | [Two Steps Forward](https://adventofcode.com/2016/day/17) | [Source](src/year2016/day17.rs) | 14254 |

src/year2016/day14.rs

Lines changed: 125 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use std::sync::Mutex;
99
use std::thread;
1010

1111
/// Atomics can be safely shared between threads.
12-
struct Shared {
12+
struct Shared<'a> {
13+
input: &'a str,
1314
done: AtomicBool,
1415
counter: AtomicI32,
1516
}
@@ -27,135 +28,174 @@ pub fn parse(input: &str) -> &str {
2728

2829
/// Hash each key once.
2930
pub fn part1(input: &str) -> i32 {
30-
let md5 = |n| {
31-
let (mut buffer, size) = format_string(input, n);
32-
hash(&mut buffer, size)
33-
};
34-
generate_pad(md5)
31+
generate_pad(input, false)
3532
}
3633

3734
/// Hash each key an additional 2016 times.
3835
pub fn part2(input: &str) -> i32 {
39-
let md5 = |n| {
40-
let (mut buffer, size) = format_string(input, n);
41-
let mut result = hash(&mut buffer, size);
42-
43-
for _ in 0..2016 {
44-
buffer[0..8].copy_from_slice(&to_ascii(result.0));
45-
buffer[8..16].copy_from_slice(&to_ascii(result.1));
46-
buffer[16..24].copy_from_slice(&to_ascii(result.2));
47-
buffer[24..32].copy_from_slice(&to_ascii(result.3));
48-
result = hash(&mut buffer, 32);
49-
}
50-
51-
result
52-
};
53-
generate_pad(md5)
54-
}
55-
56-
fn format_string(prefix: &str, n: i32) -> ([u8; 64], usize) {
57-
let string = format!("{prefix}{n}");
58-
let size = string.len();
59-
60-
let mut buffer = [0; 64];
61-
buffer[0..size].copy_from_slice(string.as_bytes());
62-
63-
(buffer, size)
36+
generate_pad(input, true)
6437
}
6538

6639
/// Find the first 64 keys that sastify the rules.
67-
fn generate_pad(md5: impl Fn(i32) -> (u32, u32, u32, u32) + Copy + Sync) -> i32 {
68-
let shared = Shared { done: AtomicBool::new(false), counter: AtomicI32::new(0) };
40+
fn generate_pad(input: &str, part_two: bool) -> i32 {
41+
let shared = Shared { input, done: AtomicBool::new(false), counter: AtomicI32::new(0) };
6942
let exclusive =
7043
Exclusive { threes: BTreeMap::new(), fives: BTreeMap::new(), found: BTreeSet::new() };
7144
let mutex = Mutex::new(exclusive);
7245

7346
// Use as many cores as possible to parallelize the search.
7447
thread::scope(|scope| {
7548
for _ in 0..thread::available_parallelism().unwrap().get() {
76-
scope.spawn(|| check_keys(&shared, &mutex, md5));
49+
scope.spawn(|| worker(&shared, &mutex, part_two));
7750
}
7851
});
7952

8053
let exclusive = mutex.into_inner().unwrap();
8154
*exclusive.found.iter().nth(63).unwrap()
8255
}
8356

84-
fn check_keys(
85-
shared: &Shared,
86-
mutex: &Mutex<Exclusive>,
87-
md5: impl Fn(i32) -> (u32, u32, u32, u32),
88-
) {
57+
#[cfg(not(feature = "simd"))]
58+
fn worker(shared: &Shared<'_>, mutex: &Mutex<Exclusive>, part_two: bool) {
8959
while !shared.done.load(Ordering::Relaxed) {
9060
// Get the next key to check.
9161
let n = shared.counter.fetch_add(1, Ordering::Relaxed);
62+
9263
// Calculate the hash.
93-
let (a, b, c, d) = md5(n);
94-
95-
// Check for sequences of 3 or 5 consecutive matching digits.
96-
let mut prev = u32::MAX;
97-
let mut same = 1;
98-
let mut three = 0;
99-
let mut five = 0;
100-
101-
for mut word in [d, c, b, a] {
102-
for _ in 0..8 {
103-
let next = word & 0xf;
104-
105-
if next == prev {
106-
same += 1;
107-
} else {
108-
same = 1;
109-
}
64+
let (mut buffer, size) = format_string(shared.input, n);
65+
let mut result = hash(&mut buffer, size);
11066

111-
if same == 3 {
112-
three = 1 << next;
113-
}
114-
if same == 5 {
115-
five |= 1 << next;
67+
if part_two {
68+
for _ in 0..2016 {
69+
buffer[0..8].copy_from_slice(&to_ascii(result.0));
70+
buffer[8..16].copy_from_slice(&to_ascii(result.1));
71+
buffer[16..24].copy_from_slice(&to_ascii(result.2));
72+
buffer[24..32].copy_from_slice(&to_ascii(result.3));
73+
result = hash(&mut buffer, 32);
74+
}
75+
}
76+
77+
check(shared, mutex, n, result);
78+
}
79+
}
80+
81+
/// Use SIMD to compute hashes in parallel in blocks of 32.
82+
#[cfg(feature = "simd")]
83+
#[allow(clippy::needless_range_loop)]
84+
fn worker(shared: &Shared<'_>, mutex: &Mutex<Exclusive>, part_two: bool) {
85+
let mut result = ([0; 32], [0; 32], [0; 32], [0; 32]);
86+
let mut buffers = [[0; 64]; 32];
87+
88+
while !shared.done.load(Ordering::Relaxed) {
89+
// Get the next key to check.
90+
let start = shared.counter.fetch_add(32, Ordering::Relaxed);
91+
92+
// Calculate the hash.
93+
for i in 0..32 {
94+
let (mut buffer, size) = format_string(shared.input, start + i as i32);
95+
let (a, b, c, d) = hash(&mut buffer, size);
96+
97+
result.0[i] = a;
98+
result.1[i] = b;
99+
result.2[i] = c;
100+
result.3[i] = d;
101+
}
102+
103+
if part_two {
104+
for _ in 0..2016 {
105+
for i in 0..32 {
106+
buffers[i][0..8].copy_from_slice(&to_ascii(result.0[i]));
107+
buffers[i][8..16].copy_from_slice(&to_ascii(result.1[i]));
108+
buffers[i][16..24].copy_from_slice(&to_ascii(result.2[i]));
109+
buffers[i][24..32].copy_from_slice(&to_ascii(result.3[i]));
116110
}
111+
result = simd::hash::<32>(&mut buffers, 32);
112+
}
113+
}
114+
115+
for i in 0..32 {
116+
let hash = (result.0[i], result.1[i], result.2[i], result.3[i]);
117+
check(shared, mutex, start + i as i32, hash);
118+
}
119+
}
120+
}
117121

118-
word >>= 4;
119-
prev = next;
122+
/// Check for sequences of 3 or 5 consecutive matching digits.
123+
fn check(shared: &Shared<'_>, mutex: &Mutex<Exclusive>, n: i32, hash: (u32, u32, u32, u32)) {
124+
let (a, b, c, d) = hash;
125+
126+
let mut prev = u32::MAX;
127+
let mut same = 1;
128+
let mut three = 0;
129+
let mut five = 0;
130+
131+
for mut word in [d, c, b, a] {
132+
for _ in 0..8 {
133+
let next = word & 0xf;
134+
135+
if next == prev {
136+
same += 1;
137+
} else {
138+
same = 1;
120139
}
140+
141+
if same == 3 {
142+
three = 1 << next;
143+
}
144+
if same == 5 {
145+
five |= 1 << next;
146+
}
147+
148+
word >>= 4;
149+
prev = next;
121150
}
151+
}
122152

123-
if three != 0 || five != 0 {
124-
let mut exclusive = mutex.lock().unwrap();
125-
let mut candidates = Vec::new();
153+
if three != 0 || five != 0 {
154+
let mut exclusive = mutex.lock().unwrap();
155+
let mut candidates = Vec::new();
126156

127-
// Compare against all 5 digit sequences.
128-
if three != 0 {
129-
exclusive.threes.insert(n, three);
157+
// Compare against all 5 digit sequences.
158+
if three != 0 {
159+
exclusive.threes.insert(n, three);
130160

131-
for (&index, &mask) in exclusive.fives.range(n + 1..n + 1000) {
132-
if three & mask != 0 {
133-
candidates.push(index);
134-
}
161+
for (_, mask) in exclusive.fives.range(n + 1..n + 1001) {
162+
if three & mask != 0 {
163+
candidates.push(n);
135164
}
136165
}
166+
}
137167

138-
// Compare against all 3 digit sequences.
139-
if five != 0 {
140-
exclusive.fives.insert(n, five);
168+
// Compare against all 3 digit sequences.
169+
if five != 0 {
170+
exclusive.fives.insert(n, five);
141171

142-
for (&index, &mask) in exclusive.threes.range(n - 1000..n - 1) {
143-
if five & mask != 0 {
144-
candidates.push(index);
145-
}
172+
for (&index, &mask) in exclusive.threes.range(n - 1000..n) {
173+
if five & mask != 0 {
174+
candidates.push(index);
146175
}
147176
}
177+
}
148178

149-
// Add any matching keys found, finishing once we have at least 64 keys.
150-
exclusive.found.extend(candidates);
179+
// Add any matching keys found, finishing once we have at least 64 keys.
180+
exclusive.found.extend(candidates);
151181

152-
if exclusive.found.len() >= 64 {
153-
shared.done.store(true, Ordering::Relaxed);
154-
}
182+
if exclusive.found.len() >= 64 {
183+
shared.done.store(true, Ordering::Relaxed);
155184
}
156185
}
157186
}
158187

188+
/// Write the salt and integer index as ASCII characters.
189+
fn format_string(prefix: &str, n: i32) -> ([u8; 64], usize) {
190+
let string = format!("{prefix}{n}");
191+
let size = string.len();
192+
193+
let mut buffer = [0; 64];
194+
buffer[0..size].copy_from_slice(string.as_bytes());
195+
196+
(buffer, size)
197+
}
198+
159199
/// Quickly convert a `u32` to an array of 8 ASCII values.
160200
fn to_ascii(n: u32) -> [u8; 8] {
161201
// Spread each nibble into its own byte, for example `1234abcd` becomes `010203040a0b0c0d`.

tests/year2016/day14_test.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1+
use aoc::year2016::day14::*;
2+
3+
const EXAMPLE: &str = "abc";
4+
15
#[test]
26
fn part1_test() {
3-
// No example data
7+
let input = parse(EXAMPLE);
8+
assert_eq!(part1(input), 22728);
49
}
510

611
#[test]
712
fn part2_test() {
8-
// No example data
13+
let input = parse(EXAMPLE);
14+
assert_eq!(part2(input), 22551);
915
}

0 commit comments

Comments
 (0)