Skip to content

Commit 9af2c5e

Browse files
committed
Year 2018 Day 14
1 parent 2cb74d1 commit 9af2c5e

File tree

7 files changed

+350
-0
lines changed

7 files changed

+350
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
250250
| 11 | [Chronal Charge](https://adventofcode.com/2018/day/11) | [Source](src/year2018/day11.rs) | 1552 |
251251
| 12 | [Subterranean Sustainability](https://adventofcode.com/2018/day/12) | [Source](src/year2018/day12.rs) | 75 |
252252
| 13 | [Mine Cart Madness](https://adventofcode.com/2018/day/13) | [Source](src/year2018/day13.rs) | 391 |
253+
| 14 | [Chocolate Charts](https://adventofcode.com/2018/day/14) | [Source](src/year2018/day14.rs) | 24000 |
253254

254255
## 2017
255256

benches/benchmark.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ mod year2018 {
142142
benchmark!(year2018, day11);
143143
benchmark!(year2018, day12);
144144
benchmark!(year2018, day13);
145+
benchmark!(year2018, day14);
145146
}
146147

147148
mod year2019 {

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ pub mod year2018 {
125125
pub mod day11;
126126
pub mod day12;
127127
pub mod day13;
128+
pub mod day14;
128129
}
129130

130131
/// # Rescue Santa from deep space with a solar system voyage.

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ fn year2018() -> Vec<Solution> {
192192
solution!(year2018, day11),
193193
solution!(year2018, day12),
194194
solution!(year2018, day13),
195+
solution!(year2018, day14),
195196
]
196197
}
197198

src/year2018/day14.rs

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
//! # Chocolate Charts
2+
//!
3+
//! This solution is heavily inspired by [Askalski's](https://www.reddit.com/user/askalski/)
4+
//! excellent post [Breaking the 1 billion recipes per second barrier](https://www.reddit.com/r/adventofcode/comments/a6wpwa/2018_day_14_breaking_the_1_billion_recipes_per/)
5+
//!
6+
//! The key insight is that after 23 recipes the elves converge into using the *same subset* of
7+
//! recipes. This subset can be stored compactly allowing efficient vector processing.
8+
//!
9+
//! Tricks used to speed things up:
10+
//! * Separate writer and reader threads to generate recipes and check them in parallel.
11+
//! * Vector processing of recipes using techniques similar to SIMD.
12+
use crate::util::parse::*;
13+
use std::sync::atomic::{AtomicBool, Ordering};
14+
use std::sync::mpsc::{channel, Receiver, Sender};
15+
use std::thread;
16+
17+
type Input = (String, usize);
18+
19+
/// Pre-calculate the first 23 recipes.
20+
const PREFIX: [u8; 23] = [3, 7, 1, 0, 1, 0, 1, 2, 4, 5, 1, 5, 8, 9, 1, 6, 7, 7, 9, 2, 5, 1, 0];
21+
22+
pub fn parse(input: &str) -> Input {
23+
// Send batches of recipes from the writer to the reader for checking.
24+
let (tx, rx) = channel();
25+
// Thread safe flag to let writer know when to stop.
26+
let done = AtomicBool::new(false);
27+
// Store recipes in fixed size vec prefilled with ones. Part two result is around 20 million
28+
// so size should be sufficient for most inputs.
29+
let mut recipes = vec![1; 25_000_000];
30+
31+
thread::scope(|scope| {
32+
// Start writer thread to produce new recipes.
33+
scope.spawn(|| writer(tx, &done, recipes.as_mut_slice()));
34+
// Reader thread checks recipes for the answers, returning when both parts are found.
35+
scope.spawn(|| reader(rx, &done, input)).join().unwrap()
36+
})
37+
}
38+
39+
pub fn part1(input: &Input) -> &str {
40+
&input.0
41+
}
42+
43+
pub fn part2(input: &Input) -> usize {
44+
input.1
45+
}
46+
47+
/// Receives batches of recipes from the writer thread, then scans them byte by byte searching
48+
/// for the part two pattern. For simplicity the pattern is always assumed to by six digits.
49+
fn reader(rx: Receiver<&[u8]>, done: &AtomicBool, input: &str) -> (String, usize) {
50+
let part_one_target = input.unsigned::<usize>() + 10;
51+
let part_two_target = u32::from_str_radix(input.trim(), 16).unwrap();
52+
53+
let mut part_one_result = None;
54+
let mut part_two_result = None;
55+
56+
let mut history = Vec::new();
57+
let mut total = 0;
58+
let mut pattern = 0;
59+
60+
for slice in rx {
61+
history.push(slice);
62+
total += slice.len();
63+
64+
// The recipes are broken up into batches. Even though these batches originally come
65+
// from the same contiguous slice, this thread has no way to know that or reassemble
66+
// the original. The result could potentially be split over two or more slices.
67+
if part_one_result.is_none() && total >= part_one_target {
68+
let mut index = 0;
69+
let mut offset = part_one_target - 10;
70+
let mut result = String::new();
71+
72+
for _ in 0..10 {
73+
// If we go past the end of a slice then check the next one.
74+
while offset >= history[index].len() {
75+
offset -= history[index].len();
76+
index += 1;
77+
}
78+
79+
// Push each digit into a string as there could be leading zeroes.
80+
let digit = history[index][offset];
81+
result.push((digit + b'0') as char);
82+
offset += 1;
83+
}
84+
85+
part_one_result = Some(result);
86+
}
87+
88+
// Simple brute force pattern matching. Slices are received in order so the pattern will
89+
// handle cases when the target is split between two slices.
90+
if part_two_result.is_none() {
91+
for (i, n) in slice.iter().copied().enumerate() {
92+
pattern = ((pattern << 4) | (n as u32)) & 0xffffff;
93+
94+
if pattern == part_two_target {
95+
part_two_result = Some(total - slice.len() + i - 5);
96+
break;
97+
}
98+
}
99+
}
100+
101+
// Signal the writer thread to finish once both results are found.
102+
if part_one_result.is_some() && part_two_result.is_some() {
103+
done.store(true, Ordering::Relaxed);
104+
break;
105+
}
106+
}
107+
108+
(part_one_result.unwrap(), part_two_result.unwrap())
109+
}
110+
111+
/// Generates recipes then sends them to the reader thread for checking in batches.
112+
/// Processing is broken into alternating "cold" and "hot" loops. An outer enclosing loop checks
113+
/// periodically for the done signal from the reader thread.
114+
///
115+
/// The "cold" loop processes recipes serially one by one but can handle input corner cases.
116+
/// It's used when either:
117+
/// * One or both elves are within the first 23 recipes.
118+
/// * One or both elves are within the last 16 recipes.
119+
///
120+
/// The "hot" loop processes recipes efficiently in chunks of 16. The vast majority of recipes
121+
/// are calculated in this loop. As much as possible is parallelized using techniques similar to
122+
/// SIMD but using regular instructions instead of SIMD instrinsics or Rust's portable SIMD API.
123+
///
124+
/// Interestingly on an Apple M2 Max this "poor man's SIMD" has the same performance as using
125+
/// the portable SIMD API. This is probably due to the fact that the serial loops that write new
126+
/// recipes take the majority of the time.
127+
fn writer<'a>(tx: Sender<&'a [u8]>, done: &AtomicBool, mut recipes: &'a mut [u8]) {
128+
// The first 23 recipes have already been generated
129+
// so the elves start at position 0 and 8 respectively.
130+
let mut elf1 = 0;
131+
let mut index1 = 0;
132+
133+
let mut elf2 = 8;
134+
let mut index2 = 0;
135+
136+
let mut base = 0;
137+
let mut size = 23;
138+
let mut needed = 23;
139+
140+
// Store the smaller subset of recipes used by the elves.
141+
let mut write = 0;
142+
let mut skip: Vec<u8> = vec![0; 5_000_000];
143+
144+
while !done.load(Ordering::Relaxed) {
145+
// Cold loop to handle start and end transitions.
146+
while elf1 < 23 || elf2 < 23 || write - index1.max(index2) <= 16 {
147+
// After the first 23 recipes both elves converge on the same set of ingredients.
148+
let recipe1 = if elf1 < 23 {
149+
PREFIX[elf1]
150+
} else {
151+
index1 += 1;
152+
skip[index1 - 1]
153+
};
154+
155+
let recipe2 = if elf2 < 23 {
156+
PREFIX[elf2]
157+
} else {
158+
index2 += 1;
159+
skip[index2 - 1]
160+
};
161+
162+
// Add next recipe.
163+
let next = recipe1 + recipe2;
164+
if next < 10 {
165+
recipes[size - base] = next;
166+
size += 1;
167+
} else {
168+
recipes[size - base + 1] = next - 10;
169+
size += 2;
170+
}
171+
172+
if needed < size {
173+
let digit = recipes[needed - base];
174+
needed += 1 + digit as usize;
175+
176+
skip[write] = digit;
177+
write += 1;
178+
}
179+
180+
// Wrap around to start if necessary.
181+
elf1 += 1 + recipe1 as usize;
182+
if elf1 >= size {
183+
elf1 -= size;
184+
index1 = 0;
185+
}
186+
187+
elf2 += 1 + recipe2 as usize;
188+
if elf2 >= size {
189+
elf2 -= size;
190+
index2 = 0;
191+
}
192+
}
193+
194+
// Hot loop to handle the majority of recipes in the middle. Process at most 10,000 recipes
195+
// at a time in order to produce batches between 160,000 and 320,000 bytes in size.
196+
// This size is roughly tuned in order to maximize reader thread throughput.
197+
let batch_size = 10_000.min((write - index1.max(index2) - 1) / 16);
198+
199+
for _ in 0..batch_size {
200+
// The skip recipes can be processed contiguously.
201+
let first = from_be_bytes(&skip, index1);
202+
let second = from_be_bytes(&skip, index2);
203+
let third = from_be_bytes(&skip, index1 + 8);
204+
let fourth = from_be_bytes(&skip, index2 + 8);
205+
206+
// Each elf will skip forward between 16 and 32 recipes.
207+
elf1 += 16 + lsb(prefix_sum(first)) + lsb(prefix_sum(third));
208+
elf2 += 16 + lsb(prefix_sum(second)) + lsb(prefix_sum(fourth));
209+
index1 += 16;
210+
index2 += 16;
211+
212+
// Process the digits in parallel using techniques similar to SIMD.
213+
let (digits1, indices1, extra1) = unpack(first, second);
214+
let (digits2, indices2, extra2) = unpack(third, fourth);
215+
216+
// Scatter each digit into the correct location, leaving "holes" where ones should go.
217+
// This is handled correctly by prefilling `recipes`` with ones when initializing.
218+
for shift in (0..64).step_by(8) {
219+
let digit = lsb(digits1 >> shift);
220+
let index = lsb(indices1 >> shift);
221+
recipes[size - base + index] = digit as u8;
222+
223+
let digit = lsb(digits2 >> shift);
224+
let index = lsb(indices2 >> shift);
225+
recipes[size - base + index + extra1] = digit as u8;
226+
}
227+
228+
size += extra1 + extra2;
229+
230+
// Write the recipes that will actually be used in subsequent loops to a smaller
231+
// contiguous vec.
232+
while needed < size {
233+
let digit = recipes[needed - base];
234+
needed += 1 + digit as usize;
235+
236+
skip[write] = digit;
237+
write += 1;
238+
}
239+
}
240+
241+
// Split the mutable `recipes` slice into two parts. This allows the reader thread to
242+
// access the head in parallel while the reader thread continues to write to the tail,
243+
// ensuring unique ownership of each part of memory to prevent any concurrency issues.
244+
let (head, tail) = recipes.split_at_mut(size - base);
245+
let _unused = tx.send(head);
246+
recipes = tail;
247+
base = size;
248+
}
249+
250+
// Drop the sender to make the receiver hang up.
251+
drop(tx);
252+
}
253+
254+
/// Convert 8 bytes in [big endian order](https://en.wikipedia.org/wiki/Endianness) into a `usize`.
255+
#[inline]
256+
fn from_be_bytes(slice: &[u8], index: usize) -> usize {
257+
usize::from_be_bytes(slice[index..index + 8].try_into().unwrap())
258+
}
259+
260+
/// Convenience function that returns least significant byte.
261+
#[inline]
262+
fn lsb(u: usize) -> usize {
263+
u & 0xff
264+
}
265+
266+
/// Compute the prefix sum of each byte within a `usize`. Let `a..h` denote the bytes from most
267+
/// significant to least significant and `Σx..y` denote the sum from `x` to `y` inclusive.
268+
///
269+
/// ```none
270+
/// s | a | b | c | d | e | f | g | h |
271+
/// s += (s >> 8) | a | Σa..b | Σb..c | Σc..d | Σd..e | Σe..f | Σf..g | Σg..h |
272+
/// s += (s >> 16) | a | Σa..b | Σa..c | Σa..d | Σb..e | Σc..f | Σd..g | Σe..h |
273+
/// s += (s >> 32) | a | Σa..b | Σa..c | Σa..d | Σa..e | Σa..f | Σa..g | Σa..h |
274+
/// ```
275+
#[inline]
276+
fn prefix_sum(u: usize) -> usize {
277+
let mut s = u;
278+
s += s >> 8;
279+
s += s >> 16;
280+
s += s >> 32;
281+
s
282+
}
283+
284+
/// Takes two groups of 8 digits each packed into a `usize` as input, then returns the output
285+
/// digits and their respective locations. Ones from sums greater than ten are implicit and not
286+
/// included since recipes has already been pre-filled with ones.
287+
#[inline]
288+
fn unpack(first: usize, second: usize) -> (usize, usize, usize) {
289+
const ONES: usize = 0x0101010101010101;
290+
const SIXES: usize = 0x0606060606060606;
291+
const INDICES: usize = 0x0001020304050607;
292+
293+
// Example values, showing each byte in a columm:
294+
//
295+
// first | 04 | 01 | 09 | 08 | 00 | 03 | 05 | 07 |
296+
// second | 03 | 00 | 02 | 04 | 09 | 06 | 05 | 01 |
297+
// sum | 07 | 01 | 0b | 0c | 09 | 09 | 0a | 08 |
298+
let sum = first + second;
299+
300+
// Add 6 to each byte so that sums greater than or equal to ten become greater than or equal
301+
// to 16, setting the first bit in the high nibble of each byte.
302+
//
303+
// sum | 07 | 01 | 0b | 0c | 09 | 09 | 0a | 08 |
304+
// SIXES | 06 | 06 | 06 | 06 | 06 | 06 | 06 | 06 |
305+
// total | 0d | 07 | 11 | 12 | 0f | 0f | 10 | 0e |
306+
// tens | 00 | 00 | 01 | 01 | 00 | 00 | 01 | 00 |
307+
let tens = ((sum + SIXES) >> 4) & ONES;
308+
309+
// Multiply by 10 to "spread" a 10 into each byte that has a total greater than 10.
310+
//
311+
// tens | 00 | 00 | 01 | 01 | 00 | 00 | 01 | 00 |
312+
// tens * 10 | 00 | 00 | 0a | 0a | 00 | 00 | 0a | 00 |
313+
// digits | 07 | 01 | 01 | 02 | 09 | 09 | 00 | 08 |
314+
let digits = sum - 10 * tens;
315+
316+
// Columns greater than 10 will takes 2 bytes when written to recipes. Each index is
317+
// offset by the number of 10s before it. Adding the normal increase indices gives the
318+
// final location of each byte.
319+
//
320+
// tens | 00 | 00 | 01 | 01 | 00 | 00 | 01 | 00 |
321+
// prefix sum | 00 | 00 | 01 | 02 | 02 | 02 | 03 | 03 |
322+
// INDICES | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 |
323+
// indices | 00 | 02 | 03 | 05 | 06 | 07 | 09 | 0a |
324+
let indices = prefix_sum(tens) + INDICES;
325+
326+
// The total number of bytes that need to be written is one plus the last index.
327+
let extra = 1 + lsb(indices);
328+
329+
(digits, indices, extra)
330+
}

tests/test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ mod year2018 {
126126
mod day11_test;
127127
mod day12_test;
128128
mod day13_test;
129+
mod day14_test;
129130
}
130131

131132
mod year2019 {

tests/year2018/day14_test.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
use aoc::year2018::day14::*;
2+
3+
const EXAMPLE: &str = "594142";
4+
5+
#[test]
6+
fn part1_test() {
7+
let input = parse(EXAMPLE);
8+
assert_eq!(part1(&input), "1291321443");
9+
}
10+
11+
#[test]
12+
fn part2_test() {
13+
let input = parse(EXAMPLE);
14+
assert_eq!(part2(&input), 2018);
15+
}

0 commit comments

Comments
 (0)