Skip to content

Commit a0fdad8

Browse files
committed
2018 day 14
Benchmarks compared to /u/askalski's implementation using AVX2 intrinsics on my 9950x: > clang++ -O3 -march=native askalski-2018-14.cpp -o askalski-2018-14 > cargo build --release && cp target/release/aoc aoc-baseline > RUSTFLAGS='-C target_cpu=native' cargo build --release && cp target/release/aoc aoc-native > hyperfine -N --input inputs/year2018/day14.txt './aoc-baseline --stdin 2018 14' './aoc-native --stdin 2018 14' './askalski-2018-14' Benchmark 1: ./aoc-baseline --stdin 2018 14 Time (mean ± σ): 16.8 ms ± 0.5 ms [User: 11.8 ms, System: 4.9 ms] Range (min … max): 16.1 ms … 18.8 ms 179 runs Benchmark 2: ./aoc-native --stdin 2018 14 Time (mean ± σ): 13.5 ms ± 0.4 ms [User: 9.1 ms, System: 4.2 ms] Range (min … max): 12.8 ms … 15.1 ms 225 runs Benchmark 3: ./askalski-2018-14 Time (mean ± σ): 13.8 ms ± 0.4 ms [User: 9.4 ms, System: 4.3 ms] Range (min … max): 13.1 ms … 16.1 ms 220 runs Summary ./aoc-native --stdin 2018 14 ran 1.02 ± 0.05 times faster than ./askalski-2018-14 1.24 ± 0.05 times faster than ./aoc-baseline --stdin 2018 14
1 parent a04abb6 commit a0fdad8

File tree

4 files changed

+329
-1
lines changed

4 files changed

+329
-1
lines changed

crates/utils/src/parser/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mod then;
1313
pub use base::*;
1414
pub use error::ParseError;
1515
pub use iterator::{ParserIterator, ParserMatchesIterator};
16-
pub use number::{i8, i16, i32, i64, i128, number_range, u8, u16, u32, u64, u128};
16+
pub use number::{digit, i8, i16, i32, i64, i128, number_range, u8, u16, u32, u64, u128};
1717
pub use one_of::one_of;
1818
pub use simple::{byte, byte_lut, byte_range, constant, eof, eol, noop, take_while, take_while1};
1919

crates/utils/src/parser/number.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,35 @@ pub fn number_range<I: Integer + Parseable>(range: RangeInclusive<I>) -> NumberR
147147
assert!(min <= max);
148148
NumberRange { min, max }
149149
}
150+
151+
#[derive(Copy, Clone)]
152+
pub struct Digit {}
153+
154+
impl<'i> Parser<'i> for Digit {
155+
type Output = u8;
156+
type Then<T: Parser<'i>> = Then2<Self, T>;
157+
158+
fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
159+
if let Some(d @ b'0'..=b'9') = input.first() {
160+
Ok((d - b'0', &input[1..]))
161+
} else {
162+
Err((ParseError::Expected("digit"), input))
163+
}
164+
}
165+
}
166+
167+
/// Parser for single digits.
168+
///
169+
/// # Examples
170+
/// ```
171+
/// # use utils::parser::{self, Parser};
172+
/// assert_eq!(
173+
/// parser::digit().parse(b"12345"),
174+
/// Ok((1u8, &b"2345"[..]))
175+
/// );
176+
/// ```
177+
#[inline]
178+
#[must_use]
179+
pub fn digit() -> Digit {
180+
Digit {}
181+
}

crates/year2018/src/day14.rs

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
use utils::prelude::*;
2+
3+
/// Searching for a pattern in an infinite recipe list.
4+
///
5+
/// This implementation is based on
6+
/// [/u/askalski's post "Breaking the 1 billion recipes per second barrier"](https://www.reddit.com/r/adventofcode/comments/a6wpwa/2018_day_14_breaking_the_1_billion_recipes_per/)
7+
/// and accompanying [gist](https://gist.github.com/Voltara/5069980afbf6cf0762fcbb09948e5649),
8+
/// adapted to safe Rust without intrinsics or explicit SIMD while achieving similar performance.
9+
///
10+
/// The key optimization is that all the elves converge on the same subset of recipes after the
11+
/// first 23 recipes, allowing them to be processed in bulk using SIMD.
12+
#[derive(Clone, Debug)]
13+
pub struct Day14 {
14+
part1: [u8; 10],
15+
part2: usize,
16+
}
17+
18+
impl Day14 {
19+
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
20+
let pattern = parser::digit()
21+
.repeat_n(parser::noop())
22+
.parse_complete(input)?;
23+
let index = input.parse().unwrap();
24+
25+
let mut searcher = Searcher::new(pattern);
26+
let part2 = searcher.run();
27+
28+
// Use a string for part 1 to preserve leading zeros
29+
let mut part1 = [0u8; 10];
30+
part1.copy_from_slice(&searcher.recipes[index..index + 10]);
31+
part1.iter_mut().for_each(|x| *x += b'0');
32+
33+
Ok(Self { part1, part2 })
34+
}
35+
36+
#[must_use]
37+
pub fn part1(&self) -> &str {
38+
str::from_utf8(&self.part1).unwrap()
39+
}
40+
41+
#[must_use]
42+
pub fn part2(&self) -> usize {
43+
self.part2
44+
}
45+
}
46+
47+
struct Searcher {
48+
pattern: [u8; 6],
49+
recipes: Vec<u8>,
50+
recipes_len: usize,
51+
packed: Vec<u8>,
52+
packed_len: usize,
53+
packed_next_idx: usize,
54+
packed_last_idx: usize,
55+
elves: [usize; 2],
56+
initial: [i32; 2],
57+
searched: usize,
58+
}
59+
60+
// Pre-calculated sequences of recipes processed before index 23 where they all converge
61+
const INITIAL_SEQUENCES: [u32; 9] = [
62+
0xfff94113, 0xffff0657, 0xfff94111, 0xfff94110, 0xffff9411, 0xffff9410, 0xfffff941, 0xffff1812,
63+
0xffffff94,
64+
];
65+
66+
// The first 23 recipes
67+
const RECIPES: [u8; 23] = [
68+
3, 7, 1, 0, 1, 0, 1, 2, 4, 5, 1, 5, 8, 9, 1, 6, 7, 7, 9, 2, 5, 1, 0,
69+
];
70+
71+
const RECIPES_LEN: usize = 22_000_000;
72+
const PACKED_LEN: usize = RECIPES_LEN / 4;
73+
const SEARCH_INTERVAL: usize = 16384;
74+
75+
impl Searcher {
76+
fn new(pattern: [u8; 6]) -> Self {
77+
// Prefill the recipes array with ones so 2-digit recipes can be written with a single
78+
// write by skipping over the leading 1.
79+
let mut recipes = vec![1u8; RECIPES_LEN];
80+
recipes[..RECIPES.len()].copy_from_slice(&RECIPES);
81+
82+
Self {
83+
pattern,
84+
recipes,
85+
recipes_len: RECIPES.len(),
86+
packed: vec![0u8; PACKED_LEN],
87+
packed_len: 0,
88+
packed_next_idx: RECIPES.len(),
89+
packed_last_idx: RECIPES.len(),
90+
elves: [0, 0],
91+
initial: [INITIAL_SEQUENCES[0] as i32, INITIAL_SEQUENCES[8] as i32],
92+
searched: 0,
93+
}
94+
}
95+
96+
#[inline]
97+
fn append_recipe(&mut self, mut recipe: u8) {
98+
// Tens digit
99+
if recipe >= 10 {
100+
recipe -= 10;
101+
if self.recipes_len == self.packed_next_idx {
102+
self.packed[self.packed_len] = 1;
103+
self.packed_len += 1;
104+
self.packed_last_idx = self.packed_next_idx;
105+
self.packed_next_idx += 2;
106+
}
107+
// No write to self.recipes as the array is prefilled with 1
108+
self.recipes_len += 1;
109+
}
110+
111+
// Ones digit
112+
if self.recipes_len == self.packed_next_idx {
113+
self.packed[self.packed_len] = recipe;
114+
self.packed_len += 1;
115+
self.packed_last_idx = self.packed_next_idx;
116+
self.packed_next_idx += recipe as usize + 1;
117+
}
118+
self.recipes[self.recipes_len] = recipe;
119+
self.recipes_len += 1;
120+
121+
// Handle wrapping around to the start
122+
for i in 0..2 {
123+
if self.elves[i] == self.packed_len {
124+
let initial_index = self.packed_last_idx
125+
+ (self.packed[self.packed_len - 1] as usize + 1)
126+
- self.recipes_len;
127+
self.initial[i] = INITIAL_SEQUENCES[initial_index] as i32;
128+
self.elves[i] = 0;
129+
}
130+
}
131+
}
132+
133+
#[inline]
134+
fn run(&mut self) -> usize {
135+
loop {
136+
// At least one elf is in the first 23 recipes, before the pattern coverages
137+
while self.initial[0] != -1 || self.initial[1] != -1 {
138+
let mut recipe = 0;
139+
for i in 0..2 {
140+
if self.initial[i] != -1 {
141+
recipe += (self.initial[i] & 0xF) as u8;
142+
self.initial[i] >>= 4;
143+
} else {
144+
recipe += self.packed[self.elves[i]];
145+
self.elves[i] += 1;
146+
}
147+
}
148+
self.append_recipe(recipe);
149+
}
150+
151+
// Both elves are after recipe 23, so recipes can be processed in bulk
152+
loop {
153+
let mut iterations = (self.packed_len - self.elves[0].max(self.elves[1])) / 32;
154+
if iterations == 0 {
155+
break;
156+
}
157+
158+
while iterations > 0 {
159+
if iterations > 16 {
160+
self.bulk_mix::<512>();
161+
iterations -= 16;
162+
} else {
163+
self.bulk_mix::<32>();
164+
iterations -= 1;
165+
}
166+
167+
// Periodically search for the pattern
168+
if self.searched + SEARCH_INTERVAL < self.recipes_len
169+
&& let Some(index) = self.search()
170+
{
171+
return index;
172+
}
173+
}
174+
175+
// Pack the new recipes
176+
while self.packed_next_idx < self.recipes_len {
177+
self.packed_last_idx = self.packed_next_idx;
178+
self.packed[self.packed_len] = self.recipes[self.packed_next_idx];
179+
self.packed_len += 1;
180+
self.packed_next_idx += self.recipes[self.packed_next_idx] as usize + 1;
181+
}
182+
}
183+
184+
// Handle the remaining recipes before wrapping around to the start
185+
while self.initial[0] == -1 && self.initial[1] == -1 {
186+
let sum = self.packed[self.elves[0]] + self.packed[self.elves[1]];
187+
self.elves[0] += 1;
188+
self.elves[1] += 1;
189+
self.append_recipe(sum);
190+
}
191+
}
192+
}
193+
194+
#[inline]
195+
fn bulk_mix<const N: usize>(&mut self) {
196+
const { assert!(N.is_multiple_of(32)) }
197+
198+
let v0 = &self.packed[self.elves[0]..self.elves[0] + N];
199+
let v1 = &self.packed[self.elves[1]..self.elves[1] + N];
200+
self.elves[0] += N;
201+
self.elves[1] += N;
202+
203+
// Calculate the ones digit and carry for each recipe sum.
204+
// This loop should be vectorized by the compiler.
205+
let mut digits = [0u8; N];
206+
let mut carry = [0u8; N];
207+
for i in 0..N {
208+
let s = v0[i] + v1[i];
209+
let c = u8::from(s >= 10);
210+
carry[i] = c;
211+
digits[i] = s - (c * 10);
212+
}
213+
214+
// Process the carry and digits sequentially in chunks of 8. This allows calculating the
215+
// indexes for all 8 items in a few u64 operations, avoiding dependencies between each
216+
// recipe.
217+
for (digits, carry) in digits.chunks_exact(8).zip(carry.chunks_exact(8)) {
218+
let slice = &mut self.recipes[self.recipes_len..self.recipes_len + 256];
219+
220+
let mut indexes = u64::from_le_bytes(carry.try_into().unwrap());
221+
// Each recipe after the first is offset by 1
222+
indexes += 0x0101_0101_0101_0100;
223+
// Bytewise prefix sum
224+
indexes += indexes << 8;
225+
indexes += indexes << 16;
226+
indexes += indexes << 32;
227+
228+
for (i, &d) in digits.iter().enumerate() {
229+
// Use a u8 index into a 256 long slice to remove bounds checks here, reducing
230+
// the number of bounds checks to 1 per 8 recipes.
231+
let idx = (indexes >> (i * 8)) as u8;
232+
slice[idx as usize] = d;
233+
}
234+
235+
self.recipes_len += (indexes >> 56) as usize + 1;
236+
}
237+
}
238+
239+
#[inline]
240+
fn search(&mut self) -> Option<usize> {
241+
// Search for the pattern in sliding 64-byte windows
242+
while self.searched + 64 + 5 < self.recipes_len {
243+
// Find matches of the first two bytes across the window.
244+
// This loop should be vectorized by the compiler.
245+
let mut candidates = [0u8; 64];
246+
let mut any_matches = false;
247+
for (i, (&a, &b)) in self.recipes[self.searched..self.searched + 64]
248+
.iter()
249+
.zip(&self.recipes[self.searched + 1..self.searched + 65])
250+
.enumerate()
251+
{
252+
let matches = (a == self.pattern[0]) & (b == self.pattern[1]);
253+
// wrapping_neg produces 0xFF for matches and 0x00 for non-matches, which matches
254+
// x86 SIMD comparison function semantics, slightly improving codegen.
255+
candidates[i] = u8::from(matches).wrapping_neg();
256+
any_matches |= matches;
257+
}
258+
259+
// If no matches were found, skip to the next window.
260+
// Do this before calculating the mask which can be expensive.
261+
if !any_matches {
262+
self.searched += 64;
263+
continue;
264+
}
265+
266+
let mut mask = candidates
267+
.iter()
268+
.enumerate()
269+
.fold(0u64, |acc, (i, &x)| acc | u64::from(x != 0) << i);
270+
271+
// For each bit in the mask, check if the full pattern matches.
272+
while mask != 0 {
273+
let m = mask.trailing_zeros() as usize;
274+
mask &= !(1 << m);
275+
276+
let index = self.searched + m;
277+
if self.recipes[index..index + 6] == self.pattern {
278+
return Some(index);
279+
}
280+
}
281+
282+
self.searched += 64;
283+
}
284+
285+
None
286+
}
287+
}
288+
289+
examples!(Day14 -> (&'static str, usize) [
290+
// Custom examples
291+
{input: "100100", part1: "8239938101", part2: 377203},
292+
{input: "123456", part1: "1371276618", part2: 450697},
293+
{input: "924933", part1: "0012267210", part2: 16462928},
294+
{input: "054274", part1: "6112136872", part2: 21000203},
295+
]);

crates/year2018/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ utils::year!(2018 => year2018, ${
1515
11 => day11::Day11,
1616
12 => day12::Day12,
1717
13 => day13::Day13,
18+
14 => day14::Day14,
1819
});

0 commit comments

Comments
 (0)