Skip to content

Commit 8391396

Browse files
committed
statistics should use default
1 parent 9398949 commit 8391396

File tree

2 files changed

+40
-38
lines changed

2 files changed

+40
-38
lines changed

src/database.rs

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,23 +1613,23 @@ impl ProjectRoutingTable {
16131613
ProjectIdPushdown::has_project_id_filter(filters)
16141614
}
16151615

1616-
/// Get actual statistics from Delta Lake metadata
1617-
async fn get_delta_statistics(&self) -> Result<Statistics> {
1618-
// Get the Delta table for the default project or first available
1619-
let project_id = self.extract_project_id_from_filters(&[]).unwrap_or_else(|| self.default_project.clone());
1620-
1621-
// Try to get the table
1622-
match self.database.resolve_table(&project_id, &self.table_name).await {
1623-
Ok(table_ref) => {
1624-
let table = table_ref.read().await;
1625-
self.database.statistics_extractor.extract_statistics(&table, &project_id, &self.table_name, &self.schema).await
1626-
}
1627-
Err(e) => {
1628-
debug!("Failed to resolve table for statistics: {}", e);
1629-
Err(anyhow::anyhow!("Failed to get table for statistics"))
1630-
}
1631-
}
1632-
}
1616+
///// Get actual statistics from Delta Lake metadata
1617+
//async fn get_delta_statistics(&self) -> Result<Statistics> {
1618+
// // Get the Delta table for the default project or first available
1619+
// let project_id = self.extract_project_id_from_filters(&[]).unwrap_or_else(|| self.default_project.clone());
1620+
//
1621+
// // Try to get the table
1622+
// match self.database.resolve_table(&project_id, &self.table_name).await {
1623+
// Ok(table_ref) => {
1624+
// let table = table_ref.read().await;
1625+
// self.database.statistics_extractor.extract_statistics(&table, &project_id, &self.table_name, &self.schema).await
1626+
// }
1627+
// Err(e) => {
1628+
// debug!("Failed to resolve table for statistics: {}", e);
1629+
// Err(anyhow::anyhow!("Failed to get table for statistics"))
1630+
// }
1631+
// }
1632+
//}
16331633
}
16341634

16351635
// Needed by DataSink
@@ -1758,26 +1758,27 @@ impl TableProvider for ProjectRoutingTable {
17581758
Ok(plan)
17591759
}
17601760
fn statistics(&self) -> Option<Statistics> {
1761-
// Use tokio's block_in_place to run async code in sync context
1762-
// This is safe here as statistics are cached and the operation is fast
1763-
tokio::task::block_in_place(|| {
1764-
let runtime = tokio::runtime::Handle::current();
1765-
runtime.block_on(async {
1766-
// Try to get statistics from Delta Lake
1767-
match self.get_delta_statistics().await {
1768-
Ok(stats) => Some(stats),
1769-
Err(e) => {
1770-
debug!("Failed to get Delta Lake statistics: {}", e);
1771-
// Fall back to conservative estimates
1772-
Some(Statistics {
1773-
num_rows: Precision::Inexact(1_000_000),
1774-
total_byte_size: Precision::Inexact(100_000_000),
1775-
column_statistics: vec![],
1776-
})
1777-
}
1778-
}
1779-
})
1780-
})
1761+
None
1762+
// // Use tokio's block_in_place to run async code in sync context
1763+
// // This is safe here as statistics are cached and the operation is fast
1764+
// tokio::task::block_in_place(|| {
1765+
// let runtime = tokio::runtime::Handle::current();
1766+
// runtime.block_on(async {
1767+
// // Try to get statistics from Delta Lake
1768+
// match self.get_delta_statistics().await {
1769+
// Ok(stats) => Some(stats),
1770+
// Err(e) => {
1771+
// debug!("Failed to get Delta Lake statistics: {}", e);
1772+
// // Fall back to conservative estimates
1773+
// Some(Statistics {
1774+
// num_rows: Precision::Inexact(1_000_000),
1775+
// total_byte_size: Precision::Inexact(100_000_000),
1776+
// column_statistics: vec![],
1777+
// })
1778+
// }
1779+
// }
1780+
// })
1781+
// })
17811782
}
17821783
}
17831784

src/statistics.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use anyhow::Result;
22
use datafusion::arrow::datatypes::SchemaRef;
3-
use datafusion::common::Statistics;
43
use datafusion::common::stats::Precision;
4+
use datafusion::common::Statistics;
55
use deltalake::DeltaTable;
66
use lru::LruCache;
77
use std::num::NonZeroUsize;
@@ -17,6 +17,7 @@ pub struct CachedStatistics {
1717
pub version: i64,
1818
}
1919

20+
// TODO: delete this file in favor of using:
2021
/// Simplified statistics extractor for Delta Lake tables
2122
/// Only extracts basic row count and byte size statistics
2223
#[derive(Debug)]

0 commit comments

Comments
 (0)