Skip to content

Commit 11b7fc3

Browse files
committed
Optimize 2018 day 11 further
Rewrite largest_total_power to allow vectorization. The solution now runs in ~630µs on my system, ~6x faster than the last commit and ~11x faster than the original solution.
1 parent 7a154e2 commit 11b7fc3

File tree

1 file changed

+24
-10
lines changed

1 file changed

+24
-10
lines changed

crates/year2018/src/day11.rs

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,31 @@ impl Day11 {
6868

6969
fn largest_total_power(&self, size: usize) -> (i32, u32, u32) {
7070
let (mut max_total, mut max_x, mut max_y) = (i32::MIN, 0, 0);
71+
let mut row_totals = [0; 301];
72+
7173
for y in 0..301 - size {
72-
for x in 0..301 - size {
73-
let index = y * 301 + x;
74-
let total = self.summed_area_table[index]
75-
+ self.summed_area_table[index + 302 * size]
76-
- self.summed_area_table[index + size]
77-
- self.summed_area_table[index + 301 * size];
78-
if total > max_total {
79-
max_total = total;
80-
max_x = x as u32 + 1;
81-
max_y = y as u32 + 1;
74+
// Avoids bounds checks, allowing the inner loop to be vectorized
75+
let mut found_new_max = false;
76+
for ((((total, &top_left), &top_right), &bottom_left), &bottom_right) in row_totals
77+
[..301 - size]
78+
.iter_mut()
79+
.zip(self.summed_area_table[y * 301..].iter())
80+
.zip(self.summed_area_table[y * 301 + size..].iter())
81+
.zip(self.summed_area_table[(y + size) * 301..].iter())
82+
.zip(self.summed_area_table[(y + size) * 301 + size..].iter())
83+
{
84+
*total = top_left + bottom_right - top_right - bottom_left;
85+
found_new_max |= *total > max_total;
86+
}
87+
88+
// Only perform scalar comparisons when a new max has been found
89+
if found_new_max {
90+
for (x, &total) in row_totals[..301 - size].iter().enumerate() {
91+
if total > max_total {
92+
max_total = total;
93+
max_x = x as u32 + 1;
94+
max_y = y as u32 + 1;
95+
}
8296
}
8397
}
8498
}

0 commit comments

Comments
 (0)