11use crate :: { StepValue , CDF } ;
22use itertools:: Itertools ;
33use std:: { cmp:: Ordering , collections:: BinaryHeap , mem} ;
4+ use textplots:: { Chart , Plot , Shape } ;
45
56#[ derive( Debug , PartialEq , Default , Clone , Copy , serde:: Serialize , serde:: Deserialize ) ]
67pub enum CompactionMode {
@@ -46,6 +47,7 @@ pub(crate) fn compact(data: &mut Vec<(f32, f32)>, mode: CompactionMode, max_size
4647 idx : usize ,
4748 dist : f32 ,
4849 use_left : bool ,
50+ p : ( f32 , f32 ) ,
4951 }
5052 impl Eq for D { }
5153 impl PartialOrd for D {
@@ -61,11 +63,12 @@ pub(crate) fn compact(data: &mut Vec<(f32, f32)>, mode: CompactionMode, max_size
6163 . then_with ( || self . idx . cmp ( & other. idx ) )
6264 }
6365 }
64- let mk_d = |dist : f32 , idx : usize , use_left : bool | D {
66+ let mk_d = |dist : f32 , idx : usize , use_left : bool , p : ( f32 , f32 ) | D {
6567 bin : ( dist / granularity) as i16 ,
6668 idx,
6769 dist,
6870 use_left,
71+ p,
6972 } ;
7073
7174 // use a binary heap to pull the closest pairs, identifying them by their x coordinate and sorting them by the distance to their right neighbor.
@@ -83,24 +86,18 @@ pub(crate) fn compact(data: &mut Vec<(f32, f32)>, mode: CompactionMode, max_size
8386 } else {
8487 return None ;
8588 } ;
86- let dist = if use_left { c. 0 - b. 0 } else { b . 0 - a . 0 } ;
87- Some ( mk_d ( dist, idx + 1 , use_left) )
89+ let dist = c. 0 - b. 0 ;
90+ Some ( mk_d ( dist, idx + 1 , use_left, ( b . 0 , b . 1 ) ) )
8891 } )
8992 . collect :: < BinaryHeap < _ > > ( ) ;
9093
9194 let mut to_remove = data. len ( ) - max_size;
92- let mut last_bin = -1 ;
9395 while let Some ( d) = heap. pop ( ) {
94- if d. bin == last_bin {
95- last_bin = -1 ;
96- continue ;
97- } else {
98- last_bin = d. bin ;
99- }
10096 // skip points that have already been removed
10197 if data[ d. idx ] . 1 < 0.0 {
10298 continue ;
10399 }
100+ assert_eq ! ( d. p, data[ d. idx] ) ;
104101
105102 // just remove this point, meaning that the left neighbour needs to be updated
106103 let mut neighbours = data[ ..d. idx ]
@@ -110,35 +107,47 @@ pub(crate) fn compact(data: &mut Vec<(f32, f32)>, mode: CompactionMode, max_size
110107 . filter_map ( |( i, ( _x, y) ) | ( * y >= 0.0 ) . then_some ( i) ) ;
111108
112109 if let Some ( neighbour) = neighbours. next ( ) {
110+ let new_data = if d. use_left {
111+ data[ neighbour]
112+ } else {
113+ ( data[ neighbour] . 0 , data[ d. idx ] . 1 )
114+ } ;
115+
113116 if let Some ( n2) = neighbours. next ( ) {
114117 // only push to heap if the next neighbour is in the opposite quadrant
115- if ( data[ n2] . 1 - data[ neighbour] . 1 ) * ( data[ neighbour] . 1 - data[ d. idx ] . 1 ) < = 0.0 {
118+ if ( data[ n2] . 1 - data[ neighbour] . 1 ) * ( data[ neighbour] . 1 - data[ d. idx ] . 1 ) > = 0.0 {
116119 heap. push ( mk_d (
117120 data[ d. idx ] . 0 - data[ neighbour] . 0 + d. dist ,
118121 d. idx ,
119122 d. use_left ,
123+ new_data,
120124 ) ) ;
121125 }
122126 }
127+
123128 // since we cannot remove the now changed neighbour from the heap, we mark it as removed instead
124129 // and move the neighbour to our position
125- if d. use_left {
126- data[ d. idx ] = data[ neighbour] ;
127- } else {
128- data[ d. idx ] . 0 = data[ neighbour] . 0 ;
129- }
130+ data[ d. idx ] = new_data;
130131 data[ neighbour] . 1 = -1.0 ;
132+
133+ // left in as a debugging tool to visualise the compaction process
134+ // let curr_cdf = data
135+ // .iter()
136+ // .filter(|(_, y)| *y >= 0.0)
137+ // .map(|(x, y)| (*x, *y))
138+ // .collect::<CDF>();
139+ // eprintln!("{curr_cdf}");
140+ // eprintln!("{}", plot_cdf([&orig_cdf, &curr_cdf]));
141+ // std::io::stdin().read_line(&mut String::new()).unwrap();
142+
143+ to_remove -= 1 ;
131144 }
132145
133- to_remove -= 1 ;
134146 if to_remove == 0 {
135147 break ;
136148 }
137149 }
138150 data. retain ( |x| x. 1 >= 0.0 ) ;
139-
140- // skipping every other occurrence of the same bin may end up draining the heap, so check whether we need to run a second pass
141- compact ( data, mode, max_size) ;
142151}
143152
144153pub ( crate ) fn compact_cdf ( data : & mut Vec < ( f32 , CDF ) > , mode : CompactionMode , max_size : usize ) {
@@ -221,3 +230,22 @@ fn quantise(cdf: CDF) -> CDF {
221230 } )
222231 . collect ( )
223232}
233+
234+ #[ allow( dead_code) ]
235+ fn plot_cdf < ' a > ( cdf : impl IntoIterator < Item = & ' a CDF > + ' a ) -> String {
236+ let mut iter = cdf. into_iter ( ) . peekable ( ) ;
237+ let mut chart = Chart :: new ( 100 , 60 , 0.0 , iter. peek ( ) . unwrap ( ) . width ( ) * 1.1 ) ;
238+ let shapes = iter
239+ . map ( |cdf| {
240+ eprintln ! ( "{:?}" , cdf. steps( ) . data( ) ) ;
241+ Shape :: Points ( & cdf. steps ( ) . data ( ) )
242+ } )
243+ . collect :: < Vec < _ > > ( ) ;
244+ let mut chart = & mut chart;
245+ for shape in & shapes {
246+ chart = chart. lineplot ( shape) ;
247+ }
248+ chart. axis ( ) ;
249+ chart. figures ( ) ;
250+ format ! ( "{}" , chart)
251+ }
0 commit comments