|
18 | 18 |
|
19 | 19 | use std::{any::Any, collections::HashMap, ops::Bound, sync::Arc};
|
20 | 20 |
|
21 |
| -use arrow_schema::{DataType, Schema, SchemaRef, SortOptions}; |
| 21 | +use arrow_schema::{Schema, SchemaRef, SortOptions}; |
22 | 22 | use bytes::Bytes;
|
23 | 23 | use chrono::{NaiveDateTime, Timelike, Utc};
|
24 | 24 | use datafusion::{
|
@@ -236,57 +236,23 @@ fn partitioned_files(
|
236 | 236 | count += num_rows;
|
237 | 237 | }
|
238 | 238 |
|
239 |
| - let mut statistics = vec![]; |
240 |
| - |
241 |
| - for field in table_schema.fields() { |
242 |
| - let Some(stats) = column_statistics |
243 |
| - .get(field.name()) |
244 |
| - .and_then(|stats| stats.as_ref()) |
245 |
| - else { |
246 |
| - statistics.push(datafusion::common::ColumnStatistics::default()); |
247 |
| - break; |
248 |
| - }; |
249 |
| - |
250 |
| - let datatype = field.data_type(); |
251 |
| - |
252 |
| - let (min, max) = match (stats, datatype) { |
253 |
| - (TypedStatistics::Bool(stats), DataType::Boolean) => ( |
254 |
| - ScalarValue::Boolean(Some(stats.min)), |
255 |
| - ScalarValue::Boolean(Some(stats.max)), |
256 |
| - ), |
257 |
| - (TypedStatistics::Int(stats), DataType::Int32) => ( |
258 |
| - ScalarValue::Int32(Some(stats.min as i32)), |
259 |
| - ScalarValue::Int32(Some(stats.max as i32)), |
260 |
| - ), |
261 |
| - (TypedStatistics::Int(stats), DataType::Int64) => ( |
262 |
| - ScalarValue::Int64(Some(stats.min)), |
263 |
| - ScalarValue::Int64(Some(stats.max)), |
264 |
| - ), |
265 |
| - (TypedStatistics::Float(stats), DataType::Float32) => ( |
266 |
| - ScalarValue::Float32(Some(stats.min as f32)), |
267 |
| - ScalarValue::Float32(Some(stats.max as f32)), |
268 |
| - ), |
269 |
| - (TypedStatistics::Float(stats), DataType::Float64) => ( |
270 |
| - ScalarValue::Float64(Some(stats.min)), |
271 |
| - ScalarValue::Float64(Some(stats.max)), |
272 |
| - ), |
273 |
| - (TypedStatistics::String(stats), DataType::Utf8) => ( |
274 |
| - ScalarValue::Utf8(Some(stats.min.clone())), |
275 |
| - ScalarValue::Utf8(Some(stats.max.clone())), |
276 |
| - ), |
277 |
| - _ => { |
278 |
| - statistics.push(datafusion::common::ColumnStatistics::default()); |
279 |
| - break; |
280 |
| - } |
281 |
| - }; |
282 |
| - |
283 |
| - statistics.push(datafusion::common::ColumnStatistics { |
284 |
| - null_count: None, |
285 |
| - max_value: Some(max), |
286 |
| - min_value: Some(min), |
287 |
| - distinct_count: None, |
| 239 | + let statistics = table_schema |
| 240 | + .fields() |
| 241 | + .iter() |
| 242 | + .map(|field| { |
| 243 | + column_statistics |
| 244 | + .get(field.name()) |
| 245 | + .and_then(|stats| stats.as_ref()) |
| 246 | + .and_then(|stats| stats.clone().min_max_as_scalar(field.data_type())) |
| 247 | + .map(|(min, max)| datafusion::common::ColumnStatistics { |
| 248 | + null_count: None, |
| 249 | + max_value: Some(max), |
| 250 | + min_value: Some(min), |
| 251 | + distinct_count: None, |
| 252 | + }) |
| 253 | + .unwrap_or_default() |
288 | 254 | })
|
289 |
| - } |
| 255 | + .collect(); |
290 | 256 |
|
291 | 257 | let statistics = datafusion::common::Statistics {
|
292 | 258 | num_rows: Some(count as usize),
|
|
0 commit comments