@@ -41,6 +41,7 @@ struct BoolAccumulator {
4141 minima : UnwrappedStatAccumulator < bool > ,
4242 true_count : UnwrappedStatAccumulator < u64 > ,
4343 null_count : UnwrappedStatAccumulator < u64 > ,
44+ uncompressed_size : UnwrappedStatAccumulator < u64 > ,
4445}
4546
4647impl BoolAccumulator {
@@ -50,6 +51,10 @@ impl BoolAccumulator {
5051 minima : UnwrappedStatAccumulator :: new ( Stat :: Min , "min" . into ( ) ) ,
5152 true_count : UnwrappedStatAccumulator :: new ( Stat :: TrueCount , "true_count" . into ( ) ) ,
5253 null_count : UnwrappedStatAccumulator :: new ( Stat :: NullCount , "null_count" . into ( ) ) ,
54+ uncompressed_size : UnwrappedStatAccumulator :: new (
55+ Stat :: UncompressedSizeInBytes ,
56+ "uncompressed_size" . into ( ) ,
57+ ) ,
5358 }
5459 }
5560}
@@ -60,6 +65,7 @@ impl MetadataAccumulator for BoolAccumulator {
6065 self . minima . push_chunk ( array) ;
6166 self . true_count . push_chunk ( array) ;
6267 self . null_count . push_chunk ( array) ;
68+ self . uncompressed_size . push_chunk ( array) ;
6369 }
6470
6571 fn into_array ( self : Box < Self > ) -> VortexResult < Option < ArrayData > > {
@@ -68,6 +74,7 @@ impl MetadataAccumulator for BoolAccumulator {
6874 self . minima . into_column ( ) ,
6975 self . true_count . into_column ( ) ,
7076 self . null_count . into_column ( ) ,
77+ self . uncompressed_size . into_column ( ) ,
7178 ]
7279 . into_iter ( )
7380 . flatten ( )
@@ -90,6 +97,7 @@ struct StandardAccumulator<T> {
9097 maxima : UnwrappedStatAccumulator < T > ,
9198 minima : UnwrappedStatAccumulator < T > ,
9299 null_count : UnwrappedStatAccumulator < u64 > ,
100+ uncompressed_size : UnwrappedStatAccumulator < u64 > ,
93101}
94102
95103impl < T > StandardAccumulator < T > {
@@ -98,6 +106,10 @@ impl<T> StandardAccumulator<T> {
98106 maxima : UnwrappedStatAccumulator :: new ( Stat :: Max , "max" . into ( ) ) ,
99107 minima : UnwrappedStatAccumulator :: new ( Stat :: Min , "min" . into ( ) ) ,
100108 null_count : UnwrappedStatAccumulator :: new ( Stat :: NullCount , "null_count" . into ( ) ) ,
109+ uncompressed_size : UnwrappedStatAccumulator :: new (
110+ Stat :: UncompressedSizeInBytes ,
111+ "uncompressed_size" . into ( ) ,
112+ ) ,
101113 }
102114 }
103115}
@@ -111,13 +123,15 @@ where
111123 self . maxima . push_chunk ( array) ;
112124 self . minima . push_chunk ( array) ;
113125 self . null_count . push_chunk ( array) ;
126+ self . uncompressed_size . push_chunk ( array) ;
114127 }
115128
116129 fn into_array ( self : Box < Self > ) -> VortexResult < Option < ArrayData > > {
117130 let ( names, fields) : ( Vec < FieldName > , Vec < ArrayData > ) = [
118131 self . maxima . into_column ( ) ,
119132 self . minima . into_column ( ) ,
120133 self . null_count . into_column ( ) ,
134+ self . uncompressed_size . into_column ( ) ,
121135 ]
122136 . into_iter ( )
123137 . flatten ( )
@@ -134,29 +148,38 @@ where
134148 }
135149}
136150
137- /// A minimal accumulator which only tracks null counts.
151+ /// A minimal accumulator which only tracks null counts and total uncompressed size .
138152struct BasicAccumulator {
139153 null_count : UnwrappedStatAccumulator < u64 > ,
154+ uncompressed_size : UnwrappedStatAccumulator < u64 > ,
140155}
141156
142157impl BasicAccumulator {
143158 fn new ( ) -> Self {
144159 Self {
145160 null_count : UnwrappedStatAccumulator :: new ( Stat :: NullCount , "null_count" . into ( ) ) ,
161+ uncompressed_size : UnwrappedStatAccumulator :: new (
162+ Stat :: UncompressedSizeInBytes ,
163+ "uncompressed_size" . into ( ) ,
164+ ) ,
146165 }
147166 }
148167}
149168
150169impl MetadataAccumulator for BasicAccumulator {
151170 fn push_chunk ( & mut self , array : & ArrayData ) {
152- self . null_count . push_chunk ( array)
171+ self . null_count . push_chunk ( array) ;
172+ self . uncompressed_size . push_chunk ( array) ;
153173 }
154174
155175 fn into_array ( self : Box < Self > ) -> VortexResult < Option < ArrayData > > {
156- let ( names, fields) : ( Vec < FieldName > , Vec < ArrayData > ) = [ self . null_count . into_column ( ) ]
157- . into_iter ( )
158- . flatten ( )
159- . unzip ( ) ;
176+ let ( names, fields) : ( Vec < FieldName > , Vec < ArrayData > ) = [
177+ self . null_count . into_column ( ) ,
178+ self . uncompressed_size . into_column ( ) ,
179+ ]
180+ . into_iter ( )
181+ . flatten ( )
182+ . unzip ( ) ;
160183 if fields. is_empty ( ) {
161184 Ok ( None )
162185 } else {
@@ -246,7 +269,16 @@ mod tests {
246269 StructArray :: try_from ( Box :: new ( bool_accumulator) . into_array ( ) . unwrap ( ) . unwrap ( ) )
247270 . unwrap ( ) ;
248271 assert_eq ! ( struct_array. len( ) , 1 ) ;
249- assert_field_names ( & struct_array, & [ "max" , "min" , "true_count" , "null_count" ] ) ;
272+ assert_field_names (
273+ & struct_array,
274+ & [
275+ "max" ,
276+ "min" ,
277+ "true_count" ,
278+ "null_count" ,
279+ "uncompressed_size" ,
280+ ] ,
281+ ) ;
250282 }
251283
252284 #[ test]
@@ -263,7 +295,10 @@ mod tests {
263295 )
264296 . unwrap ( ) ;
265297 assert_eq ! ( struct_array. len( ) , 1 ) ;
266- assert_field_names ( & struct_array, & [ "max" , "min" , "null_count" ] ) ;
298+ assert_field_names (
299+ & struct_array,
300+ & [ "max" , "min" , "null_count" , "uncompressed_size" ] ,
301+ ) ;
267302 }
268303
269304 #[ test]
@@ -328,6 +363,6 @@ mod tests {
328363 StructArray :: try_from ( Box :: new ( basic_accumulator) . into_array ( ) . unwrap ( ) . unwrap ( ) )
329364 . unwrap ( ) ;
330365 assert_eq ! ( struct_array. len( ) , 1 ) ;
331- assert_field_names ( & struct_array, & [ "null_count" ] ) ;
366+ assert_field_names ( & struct_array, & [ "null_count" , "uncompressed_size" ] ) ;
332367 }
333368}
0 commit comments