11//! # Rambunctious Recitation
22//!
3- //! Hybrid solution that uses both a `vec` and [`FastMap`] to store previously seen values .
4- //! This approach is faster than using either data structure alone. The threshold is chosen so that
5- //! about 85% of values are stored in the `vec` .
3+ //! For efficiency the `vec` storing the last previously spoken turn of numbers is `u32` .
4+ //! Each difference is at least one so zero is used as a special value to indicate numbers not
5+ //! seen before .
66//!
7- //! To save space the `vec` is `u32` instead of `usize`. Each difference is at least one so we can
8- //! use zero as a special value to indicate numbers not seen before.
7+ //! To speed things up even more, we notice that most large numbers over a certain threshold are
8+ //! spoken only once. Storing if numbers have been seen before in a compact bitset prevents
9+ //! expensive reads to main memory and halves the time needed for the solution.
910//!
10- //! Accessing the map uses the [`Entry`] method as this reduces two key lookups to one.
11- //!
12- //! [`FastMap`]: crate::util::hash
13- //! [`Entry`]: std::collections::hash_map::Entry
14- use crate :: util:: hash:: * ;
11+ //! Zero occurs the most so storing it as a dedicated variable saves another 2% of execution time.
1512use crate :: util:: parse:: * ;
1613
17- const THRESHOLD : usize = 1_000_000 ;
14+ const THRESHOLD : usize = 0x10000 ;
1815
1916pub fn parse ( input : & str ) -> Vec < usize > {
2017 input. iter_unsigned ( ) . collect ( )
@@ -30,31 +27,48 @@ pub fn part2(input: &[usize]) -> usize {
3027
3128fn play ( input : & [ usize ] , rounds : usize ) -> usize {
3229 let size = input. len ( ) - 1 ;
33- let mut last = input[ size] ;
3430
35- let mut spoken_low = vec ! [ 0 ; rounds. min( THRESHOLD ) ] ;
36- let mut spoken_high = FastMap :: with_capacity ( rounds / 5 ) ;
31+ let mut last = input[ size] ;
32+ let mut zeroth = 0 ;
33+ let mut spoken = vec ! [ 0 ; rounds] ;
34+ let mut seen = vec ! [ 0_u64 ; rounds / 64 ] ;
3735
3836 for i in 0 ..size {
39- spoken_low[ input[ i] ] = ( i + 1 ) as u32 ;
37+ if input[ i] == 0 {
38+ zeroth = i + 1 ;
39+ } else {
40+ spoken[ input[ i] ] = ( i + 1 ) as u32 ;
41+ }
4042 }
4143
4244 for i in input. len ( ) ..rounds {
43- if last < THRESHOLD {
44- let previous = spoken_low[ last] as usize ;
45- spoken_low[ last] = i as u32 ;
45+ if last == 0 {
46+ // Handle zero specially as it occurs the most.
47+ let previous = zeroth;
48+ zeroth = i;
49+ last = if previous == 0 { 0 } else { i - previous } ;
50+ } else if last < THRESHOLD {
51+ // Smaller numbers occur frequently so skip previously seen bitset check.
52+ let previous = spoken[ last] as usize ;
53+ spoken[ last] = i as u32 ;
4654 last = if previous == 0 { 0 } else { i - previous } ;
4755 } else {
48- spoken_high
49- . entry ( last as u32 )
50- . and_modify ( |previous| {
51- last = i - * previous as usize ;
52- * previous = i as u32 ;
53- } )
54- . or_insert_with ( || {
55- last = 0 ;
56- i as u32
57- } ) ;
56+ // An array of 30 million `u32`s needs 120 MB of memory which exceeds most caches.
57+ // Writing and reading to random locations in this large array goes to main memory
58+ // which is slow. Store if a number has been seen before in a compact bitset,
59+ // needing only a more cache friendly 4 MB.
60+ let base = last / 64 ;
61+ let mask = 1 << ( last % 64 ) ;
62+
63+ if seen[ base] & mask == 0 {
64+ seen[ base] |= mask;
65+ spoken[ last] = i as u32 ;
66+ last = 0 ;
67+ } else {
68+ let previous = spoken[ last] as usize ;
69+ spoken[ last] = i as u32 ;
70+ last = i - previous;
71+ }
5872 }
5973 }
6074
0 commit comments