@@ -3,6 +3,7 @@ use std::{io, mem};
33use flatbuffers:: FlatBufferBuilder ;
44use futures:: TryStreamExt ;
55use vortex_array:: array:: { ChunkedArray , StructArray } ;
6+ use vortex_array:: stats:: { ArrayStatistics , Stat } ;
67use vortex_array:: stream:: ArrayStream ;
78use vortex_array:: { ArrayDType as _, ArrayData } ;
89use vortex_buffer:: io_buf:: IoBuf ;
@@ -19,6 +20,18 @@ use crate::write::metadata_accumulators::{new_metadata_accumulator, MetadataAccu
1920use crate :: write:: postscript:: Postscript ;
2021use crate :: { EOF_SIZE , MAGIC_BYTES , MAX_FOOTER_SIZE , VERSION } ;
2122
23+ const STATS_TO_WRITE : & [ Stat ] = & [
24+ Stat :: Min ,
25+ Stat :: Max ,
26+ Stat :: TrueCount ,
27+ Stat :: NullCount ,
28+ Stat :: RunCount ,
29+ Stat :: IsConstant ,
30+ Stat :: IsSorted ,
31+ Stat :: IsStrictSorted ,
32+ Stat :: UncompressedSizeInBytes ,
33+ ] ;
34+
2235pub struct VortexFileWriter < W > {
2336 msgs : MessageWriter < W > ,
2437
@@ -217,7 +230,13 @@ impl ColumnWriter {
217230
218231 while let Some ( chunk) = stream. try_next ( ) . await ? {
219232 rows_written += chunk. len ( ) as u64 ;
233+
234+ // accumulate the stats for the stats table
220235 self . metadata . push_chunk ( & chunk) ;
236+
237+ // clear the stats that we don't want to serialize into the file
238+ chunk. statistics ( ) . retain_only ( STATS_TO_WRITE ) ;
239+
221240 msgs. write_batch ( chunk) . await ?;
222241 offsets. push ( msgs. tell ( ) ) ;
223242 row_offsets. push ( rows_written) ;
@@ -292,11 +311,13 @@ mod tests {
292311 use flatbuffers:: FlatBufferBuilder ;
293312 use futures_executor:: block_on;
294313 use vortex_array:: array:: { PrimitiveArray , StructArray , VarBinArray } ;
314+ use vortex_array:: stats:: PRUNING_STATS ;
295315 use vortex_array:: validity:: Validity ;
296316 use vortex_array:: IntoArrayData ;
297317 use vortex_flatbuffers:: WriteFlatBuffer ;
298318
299319 use crate :: write:: postscript:: Postscript ;
320+ use crate :: write:: writer:: STATS_TO_WRITE ;
300321 use crate :: { VortexFileWriter , V1_FOOTER_FBS_SIZE } ;
301322
302323 #[ test]
@@ -328,4 +349,11 @@ mod tests {
328349
329350 assert_eq ! ( buffer[ buffer_begin..buffer_end] . len( ) , V1_FOOTER_FBS_SIZE ) ;
330351 }
352+
353+ #[ test]
354+ fn stats_to_write ( ) {
355+ for stat in PRUNING_STATS {
356+ assert ! ( STATS_TO_WRITE . contains( stat) ) ;
357+ }
358+ }
331359}
0 commit comments