@@ -187,7 +187,7 @@ pub struct FileStatistics {
187
187
/// Statistics on a column level.
188
188
/// Each entry in the vector corresponds to a column in the source schema.
189
189
/// None entries are possible if statistics are not available for a column.
190
- pub column_statistics : Vec < Option < ColumnDistributionStatistics > > ,
190
+ pub column_statistics : Vec < Option < Arc < ColumnDistributionStatistics > > > ,
191
191
}
192
192
193
193
impl Default for FileStatistics {
@@ -212,7 +212,7 @@ impl FileStatistics {
212
212
column_statistics : schema
213
213
. fields ( )
214
214
. iter ( )
215
- . map ( |_| Some ( ColumnDistributionStatistics :: new_unknown ( ) ) )
215
+ . map ( |_| Some ( Arc :: new ( ColumnDistributionStatistics :: new_unknown ( ) ) ) )
216
216
. collect ( ) ,
217
217
}
218
218
}
@@ -232,7 +232,7 @@ impl FileStatistics {
232
232
/// Add a column to the column statistics
233
233
pub fn add_column_statistics (
234
234
mut self ,
235
- column_stats : Option < ColumnDistributionStatistics > ,
235
+ column_stats : Option < Arc < ColumnDistributionStatistics > > ,
236
236
) -> Self {
237
237
self . column_statistics . push ( column_stats) ;
238
238
self
@@ -246,7 +246,7 @@ impl FileStatistics {
246
246
self . column_statistics = self
247
247
. column_statistics
248
248
. into_iter ( )
249
- . map ( |s| s. map ( |stats| stats . to_inexact ( ) ) )
249
+ . map ( |s| s. map ( |arc_stats| Arc :: new ( arc_stats . as_ref ( ) . clone ( ) . to_inexact ( ) ) ) )
250
250
. collect ( ) ;
251
251
self
252
252
}
@@ -266,7 +266,7 @@ impl FileStatistics {
266
266
/// The column is taken and put into the specified statistics location
267
267
Taken ( usize ) ,
268
268
/// The original column is present
269
- Present ( Option < ColumnDistributionStatistics > ) ,
269
+ Present ( Option < Arc < ColumnDistributionStatistics > > ) ,
270
270
}
271
271
272
272
// Convert to Vec<Slot> so we can avoid copying the statistics
@@ -358,9 +358,9 @@ impl FileStatistics {
358
358
/// Merge two optional column distribution statistics.
359
359
/// Returns None if either input is None.
360
360
fn merge_column_distribution_stats (
361
- left : & Option < ColumnDistributionStatistics > ,
362
- right : & Option < ColumnDistributionStatistics > ,
363
- ) -> Result < Option < ColumnDistributionStatistics > > {
361
+ left : & Option < Arc < ColumnDistributionStatistics > > ,
362
+ right : & Option < Arc < ColumnDistributionStatistics > > ,
363
+ ) -> Result < Option < Arc < ColumnDistributionStatistics > > > {
364
364
match ( left, right) {
365
365
( Some ( l) , Some ( r) ) => {
366
366
let null_count = l. null_count . add ( & r. null_count ) ;
@@ -371,13 +371,13 @@ impl FileStatistics {
371
371
let distinct_count = Precision :: Absent ;
372
372
let row_size = Self :: merge_distributions ( & l. row_size , & r. row_size ) ?;
373
373
374
- Ok ( Some ( ColumnDistributionStatistics {
374
+ Ok ( Some ( Arc :: new ( ColumnDistributionStatistics {
375
375
null_count,
376
376
distribution,
377
377
sum_value,
378
378
distinct_count,
379
379
row_size,
380
- } ) )
380
+ } ) ) )
381
381
}
382
382
_ => Ok ( None ) ,
383
383
}
@@ -1098,10 +1098,10 @@ mod tests {
1098
1098
let stats = FileStatistics :: default ( )
1099
1099
. with_num_rows ( Precision :: Exact ( 100 ) )
1100
1100
. with_total_byte_size ( Precision :: Exact ( 1024 ) )
1101
- . add_column_statistics ( Some (
1101
+ . add_column_statistics ( Some ( Arc :: new (
1102
1102
ColumnDistributionStatistics :: new_unknown ( )
1103
1103
. with_null_count ( Precision :: Exact ( 5 ) ) ,
1104
- ) ) ;
1104
+ ) ) ) ;
1105
1105
1106
1106
let inexact = stats. to_inexact ( ) ;
1107
1107
@@ -1119,18 +1119,18 @@ mod tests {
1119
1119
use datafusion_common:: stats:: Precision ;
1120
1120
1121
1121
let stats = FileStatistics :: default ( )
1122
- . add_column_statistics ( Some (
1122
+ . add_column_statistics ( Some ( Arc :: new (
1123
1123
ColumnDistributionStatistics :: new_unknown ( )
1124
1124
. with_null_count ( Precision :: Exact ( 1 ) ) ,
1125
- ) )
1126
- . add_column_statistics ( Some (
1125
+ ) ) )
1126
+ . add_column_statistics ( Some ( Arc :: new (
1127
1127
ColumnDistributionStatistics :: new_unknown ( )
1128
1128
. with_null_count ( Precision :: Exact ( 2 ) ) ,
1129
- ) )
1130
- . add_column_statistics ( Some (
1129
+ ) ) )
1130
+ . add_column_statistics ( Some ( Arc :: new (
1131
1131
ColumnDistributionStatistics :: new_unknown ( )
1132
1132
. with_null_count ( Precision :: Exact ( 3 ) ) ,
1133
- ) ) ;
1133
+ ) ) ) ;
1134
1134
1135
1135
// Project to columns [2, 0]
1136
1136
let projection = vec ! [ 2 , 0 ] ;
@@ -1176,22 +1176,22 @@ mod tests {
1176
1176
let stats1 = FileStatistics :: default ( )
1177
1177
. with_num_rows ( Precision :: Exact ( 100 ) )
1178
1178
. with_total_byte_size ( Precision :: Exact ( 1000 ) )
1179
- . add_column_statistics ( Some (
1179
+ . add_column_statistics ( Some ( Arc :: new (
1180
1180
ColumnDistributionStatistics :: new_unknown ( )
1181
1181
. with_null_count ( Precision :: Exact ( 5 ) )
1182
1182
. with_distribution ( Precision :: Exact ( dist1) )
1183
1183
. with_sum_value ( Precision :: Exact ( ScalarValue :: Int32 ( Some ( 500 ) ) ) ) ,
1184
- ) ) ;
1184
+ ) ) ) ;
1185
1185
1186
1186
let stats2 = FileStatistics :: default ( )
1187
1187
. with_num_rows ( Precision :: Exact ( 200 ) )
1188
1188
. with_total_byte_size ( Precision :: Inexact ( 2000 ) )
1189
- . add_column_statistics ( Some (
1189
+ . add_column_statistics ( Some ( Arc :: new (
1190
1190
ColumnDistributionStatistics :: new_unknown ( )
1191
1191
. with_null_count ( Precision :: Exact ( 10 ) )
1192
1192
. with_distribution ( Precision :: Exact ( dist2) )
1193
1193
. with_sum_value ( Precision :: Exact ( ScalarValue :: Int32 ( Some ( 1000 ) ) ) ) ,
1194
- ) ) ;
1194
+ ) ) ) ;
1195
1195
1196
1196
let merged = stats1. try_merge ( & stats2) . unwrap ( ) ;
1197
1197
@@ -1224,10 +1224,10 @@ mod tests {
1224
1224
let stats = FileStatistics :: default ( )
1225
1225
. with_num_rows ( Precision :: Exact ( 100 ) )
1226
1226
. with_total_byte_size ( Precision :: Inexact ( 1024 ) )
1227
- . add_column_statistics ( Some (
1227
+ . add_column_statistics ( Some ( Arc :: new (
1228
1228
ColumnDistributionStatistics :: new_unknown ( )
1229
1229
. with_null_count ( Precision :: Exact ( 5 ) ) ,
1230
- ) ) ;
1230
+ ) ) ) ;
1231
1231
1232
1232
let display_str = format ! ( "{}" , stats) ;
1233
1233
assert ! ( display_str. contains( "Rows=Exact(100)" ) ) ;
0 commit comments