Skip to content

Commit a44ed5e

Browse files
committed
Optimize 2018 day 9 by using a fixed array
Replacing VecDeque and manually unrolling all operations gives another ~20% speedup
1 parent 1223ba7 commit a44ed5e

File tree

1 file changed

+69
-27
lines changed

1 file changed

+69
-27
lines changed

crates/year2018/src/day09.rs

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::collections::VecDeque;
21
use utils::prelude::*;
32

43
/// Simulating a marble game.
@@ -12,7 +11,7 @@ impl Day09 {
1211
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
1312
let (players, marbles) = parser::number_range(1..=999)
1413
.with_suffix(" players; last marble is worth ")
15-
.then(parser::number_range(1..=99_999))
14+
.then(parser::number_range(23..=99_999))
1615
.with_suffix(" points")
1716
.parse_complete(input)?;
1817

@@ -30,42 +29,85 @@ impl Day09 {
3029
}
3130

3231
fn max_score(players: u32, marbles: u32) -> u64 {
33-
let mut circle = VecDeque::with_capacity(marbles as usize);
34-
circle.push_front(0u32);
32+
let batches = marbles / 23;
33+
34+
// Each batch does 23x pop_back, 7x push_back and 37x push_front, meaning the buffer only
35+
// grows towards the front. Allocate a vec large enough to avoid needing to wrap around.
36+
let len = batches as usize * 37;
37+
let mut circle = vec![0u32; len];
38+
39+
// Start with the first batch completed to ensure there are enough entries to pop
40+
circle[len - 22..].copy_from_slice(&[
41+
18, 4, 17, 8, 16, 0, 15, 7, 14, 3, 13, 6, 12, 1, 11, 22, 5, 21, 10, 20, 2, 19,
42+
]);
43+
let (mut head, mut tail) = (len - 22, len - 1);
3544
let mut scores = vec![0u64; players as usize];
45+
scores[(23 % players) as usize] += 32;
3646

37-
let batches = marbles / 23;
38-
for base in (0..23 * batches).step_by(23) {
47+
for base in (23..23 * batches).step_by(23) {
3948
// Equivalent to the following operations, which naively add 23 marbles while keeping
4049
// the current marble at the back of dequeue:
4150
// 22x [push_front(pop_back), push_front(pop_back), push_back(i)]
4251
// 7x [push_back(pop_front)]
4352
// [pop_back]
44-
// By eliminating redundant pushes and pops the total number of operations per batch is
45-
// decreased from 125 to 67.
46-
let front = circle.pop_back().unwrap();
47-
circle.push_front(front);
4853

49-
for i in 1..=18 {
50-
let front = circle.pop_back().unwrap();
51-
circle.push_front(front);
52-
circle.push_front(base + i);
53-
}
54+
scores[((base + 23) % players) as usize] +=
55+
(base + 23) as u64 + circle[tail - 19] as u64;
5456

55-
let f1 = circle.pop_back().unwrap();
56-
let f2 = circle.pop_back().unwrap();
57-
let f3 = circle.pop_back().unwrap();
58-
let f4 = circle.pop_back().unwrap();
57+
let push_front = [
58+
base + 18,
59+
circle[tail - 18],
60+
base + 17,
61+
circle[tail - 17],
62+
base + 16,
63+
circle[tail - 16],
64+
base + 15,
65+
circle[tail - 15],
66+
base + 14,
67+
circle[tail - 14],
68+
base + 13,
69+
circle[tail - 13],
70+
base + 12,
71+
circle[tail - 12],
72+
base + 11,
73+
circle[tail - 11],
74+
base + 10,
75+
circle[tail - 10],
76+
base + 9,
77+
circle[tail - 9],
78+
base + 8,
79+
circle[tail - 8],
80+
base + 7,
81+
circle[tail - 7],
82+
base + 6,
83+
circle[tail - 6],
84+
base + 5,
85+
circle[tail - 5],
86+
base + 4,
87+
circle[tail - 4],
88+
base + 3,
89+
circle[tail - 3],
90+
base + 2,
91+
circle[tail - 2],
92+
base + 1,
93+
circle[tail - 1],
94+
circle[tail],
95+
];
96+
let push_back = [
97+
base + 22,
98+
circle[tail - 22],
99+
base + 21,
100+
circle[tail - 21],
101+
base + 20,
102+
circle[tail - 20],
103+
base + 19,
104+
];
59105

60-
circle.push_back(base + 22);
61-
circle.push_back(f4);
62-
circle.push_back(base + 21);
63-
circle.push_back(f3);
64-
circle.push_back(base + 20);
65-
circle.push_back(f2);
66-
circle.push_back(base + 19);
106+
circle[head - 37..head].copy_from_slice(&push_front);
107+
circle[tail - 22..tail - 15].copy_from_slice(&push_back);
67108

68-
scores[((base + 23) % players) as usize] += (base as u64 + 23) + (f1 as u64);
109+
head -= 37;
110+
tail -= 16;
69111
}
70112

71113
scores.iter().copied().max().unwrap()

0 commit comments

Comments
 (0)