@@ -4,6 +4,7 @@ use std::sync::Arc;
44use log:: warn;
55use vortex_array:: aliases:: hash_set:: HashSet ;
66use vortex_array:: array:: { Chunked , ChunkedArray } ;
7+ use vortex_array:: compress:: compute_pruning_stats;
78use vortex_array:: encoding:: EncodingRef ;
89use vortex_array:: stats:: ArrayStatistics as _;
910use vortex_array:: { Array , ArrayDType , ArrayDef , IntoArray } ;
@@ -116,6 +117,12 @@ impl ChunkedCompressor {
116117 ) ?;
117118 let mut compressed_chunks = Vec :: with_capacity ( less_chunked. nchunks ( ) ) ;
118119 for ( index, chunk) in less_chunked. chunks ( ) . enumerate ( ) {
120+ // these are extremely valuable when reading/writing, but are potentially much more expensive
121+ // to compute post-compression. That's because not all encodings implement stats, so we would
122+ // potentially have to canonicalize during writes just to get stats, which would be silly.
123+ // Also, we only really require them for column chunks, not for every array.
124+ compute_pruning_stats ( & chunk) ?;
125+
119126 let like = previous. as_ref ( ) . map ( |( like, _) | like) ;
120127 let ( compressed_chunk, tree) = ctx
121128 . named ( & format ! ( "chunk-{}" , index) )
0 commit comments