|
1 | 1 | use std::collections::HashSet; |
| 2 | +use utils::array::ArrayVec; |
2 | 3 | use utils::prelude::*; |
3 | 4 |
|
4 | 5 | /// Molecule string replacements. |
5 | 6 | /// |
6 | | -/// Part 2 assumes there is only one possible number of steps, and that the replacements are always |
7 | | -/// the same length or longer. |
| 7 | +/// Part 2 assumes there is only one possible number of steps but does not assume the `Rn` `Y` `Ar` |
| 8 | +/// bracket structure or use the formula. Instead, it uses an optimized |
| 9 | +/// [Earley parser](https://en.wikipedia.org/wiki/Earley_parser), which ensures the molecule can be |
| 10 | +/// created from the provided rules. |
8 | 11 | #[derive(Clone, Debug)] |
9 | | -pub struct Day19<'a> { |
10 | | - replacements: Vec<(&'a [u8], &'a [u8])>, |
11 | | - molecule: &'a [u8], |
| 12 | +pub struct Day19 { |
| 13 | + rules: Vec<(Option<Atom>, ArrayVec<Atom, 8>)>, |
| 14 | + molecule: Vec<Atom>, |
12 | 15 | } |
13 | 16 |
|
14 | | -impl<'a> Day19<'a> { |
15 | | - pub fn new(input: &'a str, _: InputType) -> Result<Self, InputError> { |
16 | | - let Some((replacements, molecule)) = input.rsplit_once("\n\n") else { |
| 17 | +parser::parsable_enum! { |
| 18 | + #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)] |
| 19 | + #[repr(u8)] |
| 20 | + enum Atom { |
| 21 | + #[default] |
| 22 | + "Al" => Al, |
| 23 | + "Ar" => Ar, |
| 24 | + "B" => B, |
| 25 | + "Ca" => Ca, |
| 26 | + "C" => C, |
| 27 | + "F" => F, |
| 28 | + "H" => H, |
| 29 | + "Mg" => Mg, |
| 30 | + "N" => N, |
| 31 | + "O" => O, |
| 32 | + "P" => P, |
| 33 | + "Rn" => Rn, |
| 34 | + "Si" => Si, |
| 35 | + "Th" => Th, |
| 36 | + "Ti" => Ti, |
| 37 | + "Y" => Y, |
| 38 | + } |
| 39 | +} |
| 40 | + |
| 41 | +const _: () = { |
| 42 | + assert!(Atom::ALL.len() <= 16); |
| 43 | +}; |
| 44 | + |
| 45 | +impl Day19 { |
| 46 | + pub fn new(input: &str, _: InputType) -> Result<Self, InputError> { |
| 47 | + let Some((rules_str, molecule)) = input.rsplit_once("\n\n") else { |
17 | 48 | return Err(InputError::new( |
18 | 49 | input, |
19 | 50 | 0, |
20 | | - "expected replacements then a blank line then the molecule", |
| 51 | + "expected rules then a blank line then the molecule", |
21 | 52 | )); |
22 | 53 | }; |
23 | 54 |
|
| 55 | + let rules = Atom::PARSER |
| 56 | + .map(Some) |
| 57 | + .or(b'e'.map(|_| None)) |
| 58 | + .with_suffix(" => ") |
| 59 | + .then(Atom::PARSER.repeat_arrayvec(parser::noop(), 1)) |
| 60 | + .parse_lines(rules_str)?; |
| 61 | + |
| 62 | + if rules.len() > 64 { |
| 63 | + return Err(InputError::new(input, rules_str.len(), "too many rules")); |
| 64 | + } |
| 65 | + |
24 | 66 | Ok(Self { |
25 | | - replacements: parser::take_while1(u8::is_ascii_alphabetic) |
26 | | - .then(parser::take_while1(u8::is_ascii_alphabetic).with_prefix(" => ")) |
27 | | - .parse_lines(replacements)?, |
28 | | - molecule: molecule.trim_ascii_end().as_bytes(), |
| 67 | + rules, |
| 68 | + molecule: Atom::PARSER.parse_all(molecule)?, |
29 | 69 | }) |
30 | 70 | } |
31 | 71 |
|
32 | 72 | #[must_use] |
33 | 73 | pub fn part1(&self) -> usize { |
34 | 74 | let mut set = HashSet::new(); |
35 | | - for &(from, to) in &self.replacements { |
36 | | - let new_length = self.molecule.len() - from.len() + to.len(); |
| 75 | + for (from, to) in &self.rules { |
| 76 | + let Some(from) = *from else { continue }; |
| 77 | + let new_length = self.molecule.len() + to.len() - 1; |
37 | 78 | for i in 0..self.molecule.len() { |
38 | | - if self.molecule[i..].starts_with(from) { |
| 79 | + if self.molecule[i] == from { |
39 | 80 | let mut molecule = Vec::with_capacity(new_length); |
40 | 81 | molecule.extend_from_slice(&self.molecule[..i]); |
41 | 82 | molecule.extend_from_slice(to); |
42 | | - molecule.extend_from_slice(&self.molecule[i + from.len()..]); |
43 | | - set.insert(molecule); |
| 83 | + molecule.extend_from_slice(&self.molecule[i + 1..]); |
| 84 | + |
| 85 | + // `.into_iter().map(|x| x as u8).collect::<Vec<_>>()` makes this function 2-3x |
| 86 | + // faster as the std::hash::Hash implementation for u8 implements hash_slice |
| 87 | + // efficiently using a single call to write, and the into_iter-map-collect chain |
| 88 | + // is a no-op. It isn't possible to implement Hash::hash_slice for Atom so |
| 89 | + // efficiently without unsafe code / transmute. |
| 90 | + set.insert(molecule.into_iter().map(|x| x as u8).collect::<Vec<_>>()); |
44 | 91 | } |
45 | 92 | } |
46 | 93 | } |
47 | 94 | set.len() |
48 | 95 | } |
49 | 96 |
|
50 | 97 | #[must_use] |
51 | | - pub fn part2(&self) -> u64 { |
52 | | - let mut molecule = self.molecule.to_vec(); |
53 | | - let mut steps = 0; |
54 | | - while molecule.iter().any(|&x| x != b'e') { |
55 | | - for &(from, to) in &self.replacements { |
56 | | - let mut i = 0; |
57 | | - while i < molecule.len() { |
58 | | - if molecule[i..].starts_with(to) { |
59 | | - // Replace to with from, presuming from.len() <= to.len() |
60 | | - molecule[i..i + from.len()].copy_from_slice(from); |
61 | | - molecule.drain(i + from.len()..i + to.len()); |
62 | | - |
63 | | - steps += 1; |
64 | | - } else { |
65 | | - i += 1; |
| 98 | + pub fn part2(&self) -> u32 { |
| 99 | + #[derive(Copy, Clone, Debug)] |
| 100 | + struct State { |
| 101 | + rule: usize, |
| 102 | + dot: usize, |
| 103 | + origin: usize, |
| 104 | + } |
| 105 | + |
| 106 | + // Store the chart as a list of state lists at each position, plus a bitset for the current |
| 107 | + // and next positions. This works well as only the current and next position sets are ever |
| 108 | + // updated, and the bitset makes duplicate checking fast. Previous sets are only ever |
| 109 | + // iterated over. The current list is also reused as a queue of states to process. |
| 110 | + let mut chart = vec![Vec::new(); self.molecule.len() + 1]; |
| 111 | + |
| 112 | + // Indexed by bitset[origin][dot] & (1 << rule): |
| 113 | + // - 9 possible dot values (0-8 inclusive, enforced by ArrayVec N), |
| 114 | + // - 64 possible rules (checked in new). |
| 115 | + let mut current_bitset = vec![[0u64; 9]; self.molecule.len() + 1]; |
| 116 | + let mut next_bitset = vec![[0u64; 9]; self.molecule.len() + 1]; |
| 117 | + |
| 118 | + // Preprocess the rules into separate lists by the LHS, populating e rules into the initial |
| 119 | + // set. |
| 120 | + let mut rules_by_lhs = vec![Vec::new(); 16]; |
| 121 | + for (i, (lhs, _)) in self.rules.iter().enumerate() { |
| 122 | + if let Some(lhs) = *lhs { |
| 123 | + rules_by_lhs[lhs as usize].push(i); |
| 124 | + } else { |
| 125 | + let state = State { |
| 126 | + rule: i, |
| 127 | + dot: 0, |
| 128 | + origin: 0, |
| 129 | + }; |
| 130 | + current_bitset[state.origin][state.dot] |= 1 << state.rule; |
| 131 | + chart[0].push((state, 1)); |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + // Optimization: Only do predictions once per atom per position. |
| 136 | + let mut predictions_done = 0u16; |
| 137 | + // Optimization: Only do completions once per (origin, atom) per position. |
| 138 | + let mut completions_done = vec![0u16; self.molecule.len() + 1]; |
| 139 | + |
| 140 | + for pos in 0..chart.len() { |
| 141 | + let mut set_idx = 0; |
| 142 | + while let Some(&(state, steps)) = chart[pos].get(set_idx) { |
| 143 | + let (lhs, rhs) = &self.rules[state.rule]; |
| 144 | + |
| 145 | + if state.dot < rhs.len() { |
| 146 | + // Prediction |
| 147 | + if predictions_done & (1 << rhs[state.dot] as usize) == 0 { |
| 148 | + predictions_done |= 1 << rhs[state.dot] as usize; |
| 149 | + |
| 150 | + for &i in &rules_by_lhs[rhs[state.dot] as usize] { |
| 151 | + let new = State { |
| 152 | + rule: i, |
| 153 | + dot: 0, |
| 154 | + origin: pos, |
| 155 | + }; |
| 156 | + if current_bitset[new.origin][new.dot] & (1 << new.rule) == 0 { |
| 157 | + current_bitset[new.origin][new.dot] |= 1 << new.rule; |
| 158 | + chart[pos].push((new, 1)); |
| 159 | + } |
| 160 | + } |
66 | 161 | } |
| 162 | + |
| 163 | + // Scanning |
| 164 | + if self.molecule.get(pos) == Some(&rhs[state.dot]) { |
| 165 | + let new = State { |
| 166 | + rule: state.rule, |
| 167 | + dot: state.dot + 1, |
| 168 | + origin: state.origin, |
| 169 | + }; |
| 170 | + if next_bitset[new.origin][new.dot] & (1 << new.rule) == 0 { |
| 171 | + next_bitset[new.origin][new.dot] |= 1 << new.rule; |
| 172 | + chart[pos + 1].push((new, steps)); |
| 173 | + } |
| 174 | + } |
| 175 | + } else if let Some(lhs) = *lhs { |
| 176 | + // Completion |
| 177 | + if completions_done[state.origin] & (1 << lhs as usize) == 0 { |
| 178 | + completions_done[state.origin] |= 1 << lhs as usize; |
| 179 | + |
| 180 | + let [current_chart, origin_chart] = chart |
| 181 | + .get_disjoint_mut([pos, state.origin]) |
| 182 | + .expect("origin must be less than pos"); |
| 183 | + |
| 184 | + for (prev_state, prev_steps) in origin_chart.iter() { |
| 185 | + let (_, prev_rhs) = &self.rules[prev_state.rule]; |
| 186 | + if prev_state.dot < prev_rhs.len() && prev_rhs[prev_state.dot] == lhs { |
| 187 | + let new = State { |
| 188 | + rule: prev_state.rule, |
| 189 | + dot: prev_state.dot + 1, |
| 190 | + origin: prev_state.origin, |
| 191 | + }; |
| 192 | + if current_bitset[new.origin][new.dot] & (1 << new.rule) == 0 { |
| 193 | + current_bitset[new.origin][new.dot] |= 1 << new.rule; |
| 194 | + current_chart.push((new, steps + prev_steps)); |
| 195 | + } |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | + } else if pos == self.molecule.len() { |
| 200 | + // Completion of a start rule consuming the entire molecule |
| 201 | + return steps; |
67 | 202 | } |
| 203 | + |
| 204 | + set_idx += 1; |
68 | 205 | } |
| 206 | + |
| 207 | + (current_bitset, next_bitset) = (next_bitset, current_bitset); |
| 208 | + next_bitset[..=pos].fill([0; 9]); |
| 209 | + |
| 210 | + // Reset optimization caches for the next position |
| 211 | + predictions_done = 0u16; |
| 212 | + completions_done[..pos].fill(0); |
69 | 213 | } |
70 | | - steps |
| 214 | + |
| 215 | + panic!("no solution found"); |
71 | 216 | } |
72 | 217 | } |
73 | 218 |
|
74 | | -examples!(Day19<'_> -> (usize, u64) [ |
| 219 | +examples!(Day19 -> (usize, u32) [ |
75 | 220 | {input: "H => HO\nH => OH\nO => HH\n\nHOH", part1: 4}, |
76 | 221 | {input: "e => H\ne => O\nH => HO\nH => OH\nO => HH\n\nHOH", part2: 3}, |
77 | 222 | {input: "e => H\ne => O\nH => HO\nH => OH\nO => HH\n\nHOHOHO", part2: 6}, |
|
0 commit comments