Skip to content

Commit c697f69

Browse files
committed
Use Earley parser in 2015 day 19 part 2
This ensures the molecule can actually be created from the provided rules and is still fast (<1ms locally).
1 parent 6ab3671 commit c697f69

File tree

6 files changed

+241
-41
lines changed

6 files changed

+241
-41
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ edition = "2024"
1010
license = "MIT"
1111
publish = false
1212
repository = "https://github.com/ictrobot/aoc-rs"
13-
rust-version = "1.85.0"
13+
rust-version = "1.86.0"
1414

1515
[workspace.lints.clippy]
1616
pedantic = { level = "warn", priority = -1 }

crates/utils/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ pub use wasm::multithreading;
2727
pub mod prelude {
2828
pub use crate::examples;
2929
pub use crate::input::{InputError, InputType, MapWithInputExt as _};
30-
pub use crate::parser::{self, Parser as _};
30+
pub use crate::parser::{self, Parseable as _, Parser as _};
3131
}

crates/utils/src/parser/macros.rs

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
1010
/// literals separated by commas, over 2x faster.
1111
///
12+
/// See also [`parser::parsable_enum!`](crate::parser::parsable_enum), which provides a macro to
13+
/// define an enum and literal parser together.
14+
///
1215
/// # Examples
1316
/// ```
1417
/// # use utils::parser::{Parser, self};
15-
///
1618
/// #[derive(Debug, PartialEq)]
1719
/// enum Example {
1820
/// A,
@@ -36,7 +38,7 @@ macro_rules! parser_literal_map {
3638
(
3739
$($($l:literal)|+ => $e:expr),+$(,)?
3840
) => {{
39-
fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
41+
const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
4042

4143
coerce_to_parser(|input| {
4244
$($(
@@ -56,6 +58,58 @@ macro_rules! parser_literal_map {
5658
};
5759
}
5860

61+
/// Macro to define an enum that implements [`Parseable`](crate::parser::Parseable).
62+
///
63+
/// The parser is implemented using [`parser::literal_map!`](crate::parser::literal_map).
64+
///
65+
/// # Examples
66+
/// ```
67+
/// # use utils::parser::{Parser, Parseable, self};
68+
/// parser::parsable_enum! {
69+
/// #[derive(Debug, PartialEq, Default)]
70+
/// enum Direction {
71+
/// #[default]
72+
/// "north" | "n" => North,
73+
/// "south" | "s" => South,
74+
/// "east" | "e" => East,
75+
/// "west" | "w" => West,
76+
/// }
77+
/// }
78+
///
79+
/// assert_eq!(Direction::PARSER.parse(b"north"), Ok((Direction::North, &b""[..])));
80+
/// assert_eq!(Direction::PARSER.parse(b"s"), Ok((Direction::South, &b""[..])));
81+
/// assert!(Direction::PARSER.parse(b"a").is_err());
82+
/// ```
83+
#[macro_export]
84+
macro_rules! parser_parsable_enum {
85+
(
86+
$(#[$enum_meta:meta])*
87+
enum $name:ident {$(
88+
$(#[$meta:meta])*
89+
$($l:literal)|+ => $variant:ident $(= $value:expr)?,
90+
)+}
91+
) => {
92+
$(#[$enum_meta])*
93+
pub enum $name {$(
94+
$(#[$meta])*
95+
$variant $(= $value)?,
96+
)+}
97+
98+
impl $name {
99+
const ALL: &'static [$name] = &[$(
100+
Self::$variant,
101+
)+];
102+
}
103+
104+
impl $crate::parser::Parseable for $name {
105+
type Parser = for<'a> fn(&'a [u8]) -> $crate::parser::ParseResult<'a, Self>;
106+
const PARSER: Self::Parser = $crate::parser_literal_map!($(
107+
$($l)|+ => Self::$variant,
108+
)+);
109+
}
110+
};
111+
}
112+
59113
/// Macro to define a custom parser using a `match` inspired parse tree syntax.
60114
///
61115
/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
@@ -203,7 +257,7 @@ macro_rules! parser_parse_tree {
203257

204258
// Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
205259
(($($first:tt)+) $($tail:tt)+) => {{
206-
fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
260+
const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
207261

208262
coerce_to_parser(|input| {
209263
let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");

crates/utils/src/parser/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ pub use one_of::one_of;
1818
pub use simple::{byte, byte_range, constant, eof, eol, noop, take_while, take_while1};
1919

2020
pub use crate::parser_literal_map as literal_map;
21+
pub use crate::parser_parsable_enum as parsable_enum;
2122
pub use crate::parser_parse_tree as parse_tree;

crates/year2015/src/day19.rs

Lines changed: 180 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,222 @@
11
use std::collections::HashSet;
2+
use utils::array::ArrayVec;
23
use utils::prelude::*;
34

45
/// Molecule string replacements.
56
///
6-
/// Part 2 assumes there is only one possible number of steps, and that the replacements are always
7-
/// the same length or longer.
7+
/// Part 2 assumes there is only one possible number of steps but does not assume the `Rn` `Y` `Ar`
8+
/// bracket structure or use the formula. Instead, it uses an optimized
9+
/// [Earley parser](https://en.wikipedia.org/wiki/Earley_parser), which ensures the molecule can be
10+
/// created from the provided rules.
811
#[derive(Clone, Debug)]
9-
pub struct Day19<'a> {
10-
replacements: Vec<(&'a [u8], &'a [u8])>,
11-
molecule: &'a [u8],
12+
pub struct Day19 {
13+
rules: Vec<(Option<Atom>, ArrayVec<Atom, 8>)>,
14+
molecule: Vec<Atom>,
1215
}
1316

14-
impl<'a> Day19<'a> {
15-
pub fn new(input: &'a str, _: InputType) -> Result<Self, InputError> {
16-
let Some((replacements, molecule)) = input.rsplit_once("\n\n") else {
17+
parser::parsable_enum! {
18+
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
19+
#[repr(u8)]
20+
enum Atom {
21+
#[default]
22+
"Al" => Al,
23+
"Ar" => Ar,
24+
"B" => B,
25+
"Ca" => Ca,
26+
"C" => C,
27+
"F" => F,
28+
"H" => H,
29+
"Mg" => Mg,
30+
"N" => N,
31+
"O" => O,
32+
"P" => P,
33+
"Rn" => Rn,
34+
"Si" => Si,
35+
"Th" => Th,
36+
"Ti" => Ti,
37+
"Y" => Y,
38+
}
39+
}
40+
41+
const _: () = {
42+
assert!(Atom::ALL.len() <= 16);
43+
};
44+
45+
impl Day19 {
46+
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
47+
let Some((rules_str, molecule)) = input.rsplit_once("\n\n") else {
1748
return Err(InputError::new(
1849
input,
1950
0,
20-
"expected replacements then a blank line then the molecule",
51+
"expected rules then a blank line then the molecule",
2152
));
2253
};
2354

55+
let rules = Atom::PARSER
56+
.map(Some)
57+
.or(b'e'.map(|_| None))
58+
.with_suffix(" => ")
59+
.then(Atom::PARSER.repeat_arrayvec(parser::noop(), 1))
60+
.parse_lines(rules_str)?;
61+
62+
if rules.len() > 64 {
63+
return Err(InputError::new(input, rules_str.len(), "too many rules"));
64+
}
65+
2466
Ok(Self {
25-
replacements: parser::take_while1(u8::is_ascii_alphabetic)
26-
.then(parser::take_while1(u8::is_ascii_alphabetic).with_prefix(" => "))
27-
.parse_lines(replacements)?,
28-
molecule: molecule.trim_ascii_end().as_bytes(),
67+
rules,
68+
molecule: Atom::PARSER.parse_all(molecule)?,
2969
})
3070
}
3171

3272
#[must_use]
3373
pub fn part1(&self) -> usize {
3474
let mut set = HashSet::new();
35-
for &(from, to) in &self.replacements {
36-
let new_length = self.molecule.len() - from.len() + to.len();
75+
for (from, to) in &self.rules {
76+
let Some(from) = *from else { continue };
77+
let new_length = self.molecule.len() + to.len() - 1;
3778
for i in 0..self.molecule.len() {
38-
if self.molecule[i..].starts_with(from) {
79+
if self.molecule[i] == from {
3980
let mut molecule = Vec::with_capacity(new_length);
4081
molecule.extend_from_slice(&self.molecule[..i]);
4182
molecule.extend_from_slice(to);
42-
molecule.extend_from_slice(&self.molecule[i + from.len()..]);
43-
set.insert(molecule);
83+
molecule.extend_from_slice(&self.molecule[i + 1..]);
84+
85+
// `.into_iter().map(|x| x as u8).collect::<Vec<_>>()` makes this function 2-3x
86+
// faster as the std::hash::Hash implementation for u8 implements hash_slice
87+
// efficiently using a single call to write, and the into_iter-map-collect chain
88+
// is a no-op. It isn't possible to implement Hash::hash_slice for Atom so
89+
// efficiently without unsafe code / transmute.
90+
set.insert(molecule.into_iter().map(|x| x as u8).collect::<Vec<_>>());
4491
}
4592
}
4693
}
4794
set.len()
4895
}
4996

5097
#[must_use]
51-
pub fn part2(&self) -> u64 {
52-
let mut molecule = self.molecule.to_vec();
53-
let mut steps = 0;
54-
while molecule.iter().any(|&x| x != b'e') {
55-
for &(from, to) in &self.replacements {
56-
let mut i = 0;
57-
while i < molecule.len() {
58-
if molecule[i..].starts_with(to) {
59-
// Replace to with from, presuming from.len() <= to.len()
60-
molecule[i..i + from.len()].copy_from_slice(from);
61-
molecule.drain(i + from.len()..i + to.len());
62-
63-
steps += 1;
64-
} else {
65-
i += 1;
98+
pub fn part2(&self) -> u32 {
99+
#[derive(Copy, Clone, Debug)]
100+
struct State {
101+
rule: usize,
102+
dot: usize,
103+
origin: usize,
104+
}
105+
106+
// Store the chart as a list of state lists at each position, plus a bitset for the current
107+
// and next positions. This works well as only the current and next position sets are ever
108+
// updated, and the bitset makes duplicate checking fast. Previous sets are only ever
109+
// iterated over. The current list is also reused as a queue of states to process.
110+
let mut chart = vec![Vec::new(); self.molecule.len() + 1];
111+
112+
// Indexed by bitset[origin][dot] & (1 << rule):
113+
// - 9 possible dot values (0-8 inclusive, enforced by ArrayVec N),
114+
// - 64 possible rules (checked in new).
115+
let mut current_bitset = vec![[0u64; 9]; self.molecule.len() + 1];
116+
let mut next_bitset = vec![[0u64; 9]; self.molecule.len() + 1];
117+
118+
// Preprocess the rules into separate lists by the LHS, populating e rules into the initial
119+
// set.
120+
let mut rules_by_lhs = vec![Vec::new(); 16];
121+
for (i, (lhs, _)) in self.rules.iter().enumerate() {
122+
if let Some(lhs) = *lhs {
123+
rules_by_lhs[lhs as usize].push(i);
124+
} else {
125+
let state = State {
126+
rule: i,
127+
dot: 0,
128+
origin: 0,
129+
};
130+
current_bitset[state.origin][state.dot] |= 1 << state.rule;
131+
chart[0].push((state, 1));
132+
}
133+
}
134+
135+
// Optimization: Only do predictions once per atom per position.
136+
let mut predictions_done = 0u16;
137+
// Optimization: Only do completions once per (origin, atom) per position.
138+
let mut completions_done = vec![0u16; self.molecule.len() + 1];
139+
140+
for pos in 0..chart.len() {
141+
let mut set_idx = 0;
142+
while let Some(&(state, steps)) = chart[pos].get(set_idx) {
143+
let (lhs, rhs) = &self.rules[state.rule];
144+
145+
if state.dot < rhs.len() {
146+
// Prediction
147+
if predictions_done & (1 << rhs[state.dot] as usize) == 0 {
148+
predictions_done |= 1 << rhs[state.dot] as usize;
149+
150+
for &i in &rules_by_lhs[rhs[state.dot] as usize] {
151+
let new = State {
152+
rule: i,
153+
dot: 0,
154+
origin: pos,
155+
};
156+
if current_bitset[new.origin][new.dot] & (1 << new.rule) == 0 {
157+
current_bitset[new.origin][new.dot] |= 1 << new.rule;
158+
chart[pos].push((new, 1));
159+
}
160+
}
66161
}
162+
163+
// Scanning
164+
if self.molecule.get(pos) == Some(&rhs[state.dot]) {
165+
let new = State {
166+
rule: state.rule,
167+
dot: state.dot + 1,
168+
origin: state.origin,
169+
};
170+
if next_bitset[new.origin][new.dot] & (1 << new.rule) == 0 {
171+
next_bitset[new.origin][new.dot] |= 1 << new.rule;
172+
chart[pos + 1].push((new, steps));
173+
}
174+
}
175+
} else if let Some(lhs) = *lhs {
176+
// Completion
177+
if completions_done[state.origin] & (1 << lhs as usize) == 0 {
178+
completions_done[state.origin] |= 1 << lhs as usize;
179+
180+
let [current_chart, origin_chart] = chart
181+
.get_disjoint_mut([pos, state.origin])
182+
.expect("origin must be less than pos");
183+
184+
for (prev_state, prev_steps) in origin_chart.iter() {
185+
let (_, prev_rhs) = &self.rules[prev_state.rule];
186+
if prev_state.dot < prev_rhs.len() && prev_rhs[prev_state.dot] == lhs {
187+
let new = State {
188+
rule: prev_state.rule,
189+
dot: prev_state.dot + 1,
190+
origin: prev_state.origin,
191+
};
192+
if current_bitset[new.origin][new.dot] & (1 << new.rule) == 0 {
193+
current_bitset[new.origin][new.dot] |= 1 << new.rule;
194+
current_chart.push((new, steps + prev_steps));
195+
}
196+
}
197+
}
198+
}
199+
} else if pos == self.molecule.len() {
200+
// Completion of a start rule consuming the entire molecule
201+
return steps;
67202
}
203+
204+
set_idx += 1;
68205
}
206+
207+
(current_bitset, next_bitset) = (next_bitset, current_bitset);
208+
next_bitset[..=pos].fill([0; 9]);
209+
210+
// Reset optimization caches for the next position
211+
predictions_done = 0u16;
212+
completions_done[..pos].fill(0);
69213
}
70-
steps
214+
215+
panic!("no solution found");
71216
}
72217
}
73218

74-
examples!(Day19<'_> -> (usize, u64) [
219+
examples!(Day19 -> (usize, u32) [
75220
{input: "H => HO\nH => OH\nO => HH\n\nHOH", part1: 4},
76221
{input: "e => H\ne => O\nH => HO\nH => OH\nO => HH\n\nHOH", part2: 3},
77222
{input: "e => H\ne => O\nH => HO\nH => OH\nO => HH\n\nHOHOHO", part2: 6},

crates/year2015/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ utils::year!(2015 => year2015, ${
2020
16 => day16::Day16<'_>,
2121
17 => day17::Day17,
2222
18 => day18::Day18,
23-
19 => day19::Day19<'_>,
23+
19 => day19::Day19,
2424
20 => day20::Day20,
2525
21 => day21::Day21,
2626
22 => day22::Day22,

0 commit comments

Comments
 (0)