Skip to content

Commit f4491b5

Browse files
committed
Optimize 2024 day 4
Rewriting to allow vectorization reduces runtime from ~180µs to 60µs, or ~40µs with target_cpu=native.
1 parent 33a393e commit f4491b5

File tree

1 file changed

+32
-52
lines changed

1 file changed

+32
-52
lines changed

crates/year2024/src/day04.rs

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -4,80 +4,60 @@ use utils::prelude::*;
44
/// Counting matches in a word search.
55
#[derive(Clone, Debug)]
66
pub struct Day04 {
7-
rows: usize,
87
cols: usize,
98
grid: Vec<u8>,
109
}
1110

1211
impl Day04 {
1312
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
14-
let (rows, cols, grid) = grid::from_str_padded(input, 3, b'\0', |c| match c {
13+
let (_, cols, grid) = grid::from_str_padded(input, 3, b'\0', |c| match c {
1514
b'X' | b'M' | b'A' | b'S' => Some(c),
1615
_ => None,
1716
})?;
18-
Ok(Self { rows, cols, grid })
17+
Ok(Self { cols, grid })
1918
}
2019

2120
#[must_use]
2221
pub fn part1(&self) -> u32 {
23-
let offsets = [
24-
self.cols as isize,
25-
-(self.cols as isize),
26-
1,
27-
-1,
28-
(self.cols as isize) + 1,
29-
(self.cols as isize) - 1,
30-
-(self.cols as isize) + 1,
31-
-(self.cols as isize) - 1,
32-
];
22+
self.check_offset(self.cols as isize)
23+
+ self.check_offset(-(self.cols as isize))
24+
+ self.check_offset(1)
25+
+ self.check_offset(-1)
26+
+ self.check_offset((self.cols as isize) + 1)
27+
+ self.check_offset((self.cols as isize) - 1)
28+
+ self.check_offset(-(self.cols as isize) + 1)
29+
+ self.check_offset(-(self.cols as isize) - 1)
30+
}
3331

32+
fn check_offset(&self, offset: isize) -> u32 {
33+
let start = 3 * self.cols + 3;
3434
let mut count = 0;
35-
for r in 3..self.rows - 3 {
36-
for c in 3..self.cols - 3 {
37-
let i = r * self.cols + c;
38-
if self.grid[i] != b'X' {
39-
continue;
40-
}
41-
42-
for o in offsets {
43-
if self.grid[i.wrapping_add_signed(o)] == b'M'
44-
&& self.grid[i.wrapping_add_signed(o * 2)] == b'A'
45-
&& self.grid[i.wrapping_add_signed(o * 3)] == b'S'
46-
{
47-
count += 1;
48-
}
49-
}
50-
}
35+
for (((first, second), third), fourth) in self.grid[start..]
36+
.iter()
37+
.zip(&self.grid[start.wrapping_add_signed(offset)..])
38+
.zip(&self.grid[start.wrapping_add_signed(offset * 2)..])
39+
.zip(&self.grid[start.wrapping_add_signed(offset * 3)..])
40+
{
41+
count += u32::from(
42+
(*first == b'X') & (*second == b'M') & (*third == b'A') & (*fourth == b'S'),
43+
);
5144
}
5245
count
5346
}
5447

5548
#[must_use]
5649
pub fn part2(&self) -> u32 {
5750
let mut count = 0;
58-
for r in 4..self.rows - 4 {
59-
for c in 4..self.cols - 4 {
60-
let i = r * self.cols + c;
61-
if self.grid[i] != b'A' {
62-
continue;
63-
}
64-
65-
let (nw, ne, sw, se) = (
66-
self.grid[i.wrapping_add_signed(-(self.cols as isize) - 1)],
67-
self.grid[i.wrapping_add_signed(-(self.cols as isize) + 1)],
68-
self.grid[i.wrapping_add_signed((self.cols as isize) - 1)],
69-
self.grid[i.wrapping_add_signed((self.cols as isize) + 1)],
70-
);
71-
72-
// Given each variable is one of (b'\0', b'X', b'M', b'A', b'S') this is
73-
// equivalent to and slightly faster than
74-
// ((nw == b'M' && se == b'S') || (nw == b'S' && se == b'M'))
75-
// && ((ne == b'M' && sw == b'S') || (ne == b'S' && sw == b'M'))
76-
// As no other pair XORed equals b'M' ^ b'S'
77-
if (nw ^ se) == (b'M' ^ b'S') && (ne ^ sw) == (b'M' ^ b'S') {
78-
count += 1;
79-
}
80-
}
51+
for ((((middle, nw), ne), sw), se) in self.grid[self.cols * 4 + 4..]
52+
.iter()
53+
.zip(&self.grid[self.cols * 3 + 3..])
54+
.zip(&self.grid[self.cols * 3 + 5..])
55+
.zip(&self.grid[self.cols * 5 + 3..])
56+
.zip(&self.grid[self.cols * 5 + 5..])
57+
{
58+
count += u32::from(
59+
(*middle == b'A') & ((*nw ^ *se) == (b'M' ^ b'S')) & ((*ne ^ *sw) == (b'M' ^ b'S')),
60+
);
8161
}
8262
count
8363
}

0 commit comments

Comments
 (0)