@@ -268,13 +268,35 @@ impl Statistics {
268268 return self ;
269269 } ;
270270
271- // todo: it would be nice to avoid cloning column statistics if
272- // possible (e.g. if the projection did not contain duplicates)
273- self . column_statistics = projection
274- . iter ( )
275- . map ( |& i| self . column_statistics [ i] . clone ( ) )
271+ enum Slot {
272+ /// The column is taken and put into the specified statistics location
273+ Taken ( usize ) ,
274+ /// The original columns is present
275+ Present ( ColumnStatistics ) ,
276+ }
277+
278+ // Convert to Vec<Slot> so we can avoid copying the statistics
279+ let mut columns: Vec < _ > = std:: mem:: take ( & mut self . column_statistics )
280+ . into_iter ( )
281+ . map ( Slot :: Present )
276282 . collect ( ) ;
277283
284+ for idx in projection {
285+ let next_idx = self . column_statistics . len ( ) ;
286+ let slot = std:: mem:: replace (
287+ columns. get_mut ( * idx) . expect ( "projection out of bounds" ) ,
288+ Slot :: Taken ( next_idx) ,
289+ ) ;
290+ match slot {
291+ // The column was there, so just move it
292+ Slot :: Present ( col) => self . column_statistics . push ( col) ,
293+ // The column was taken, so copy from the previous location
294+ Slot :: Taken ( prev_idx) => self
295+ . column_statistics
296+ . push ( self . column_statistics [ prev_idx] . clone ( ) ) ,
297+ }
298+ }
299+
278300 self
279301 }
280302
@@ -581,4 +603,50 @@ mod tests {
581603 let p2 = precision. clone ( ) ;
582604 assert_eq ! ( precision, p2) ;
583605 }
606+
607+ #[ test]
608+ fn test_project_none ( ) {
609+ let projection = None ;
610+ let stats = make_stats ( vec ! [ 10 , 20 , 30 ] ) . project ( projection. as_ref ( ) ) ;
611+ assert_eq ! ( stats, make_stats( vec![ 10 , 20 , 30 ] ) ) ;
612+ }
613+
614+ #[ test]
615+ fn test_project_empty ( ) {
616+ let projection = Some ( vec ! [ ] ) ;
617+ let stats = make_stats ( vec ! [ 10 , 20 , 30 ] ) . project ( projection. as_ref ( ) ) ;
618+ assert_eq ! ( stats, make_stats( vec![ ] ) ) ;
619+ }
620+
621+ #[ test]
622+ fn test_project_swap ( ) {
623+ let projection = Some ( vec ! [ 2 , 1 ] ) ;
624+ let stats = make_stats ( vec ! [ 10 , 20 , 30 ] ) . project ( projection. as_ref ( ) ) ;
625+ assert_eq ! ( stats, make_stats( vec![ 30 , 20 ] ) ) ;
626+ }
627+
628+ #[ test]
629+ fn test_project_repeated ( ) {
630+ let projection = Some ( vec ! [ 1 , 2 , 1 , 1 , 0 , 2 ] ) ;
631+ let stats = make_stats ( vec ! [ 10 , 20 , 30 ] ) . project ( projection. as_ref ( ) ) ;
632+ assert_eq ! ( stats, make_stats( vec![ 20 , 30 , 20 , 20 , 10 , 30 ] ) ) ;
633+ }
634+
635+ // Make a Statistics structure with the specified null counts for each column
636+ fn make_stats ( counts : impl IntoIterator < Item = usize > ) -> Statistics {
637+ Statistics {
638+ num_rows : Precision :: Exact ( 42 ) ,
639+ total_byte_size : Precision :: Exact ( 500 ) ,
640+ column_statistics : counts. into_iter ( ) . map ( col_stats_i64) . collect ( ) ,
641+ }
642+ }
643+
644+ fn col_stats_i64 ( null_count : usize ) -> ColumnStatistics {
645+ ColumnStatistics {
646+ null_count : Precision :: Exact ( null_count) ,
647+ max_value : Precision :: Exact ( ScalarValue :: Int64 ( Some ( 42 ) ) ) ,
648+ min_value : Precision :: Exact ( ScalarValue :: Int64 ( Some ( 64 ) ) ) ,
649+ distinct_count : Precision :: Exact ( 100 ) ,
650+ }
651+ }
584652}
0 commit comments