Skip to content

Commit 4e2edee

Browse files
Fix empty table query returning error instead of empty result (#22)
1 parent 5f04ef6 commit 4e2edee

File tree

2 files changed

+169
-7
lines changed

2 files changed

+169
-7
lines changed

src/table.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,24 +320,28 @@ impl TableProvider for DuckLakeTable {
320320
execs.push(exec);
321321
}
322322

323+
// Handle empty tables (no data files)
324+
if execs.is_empty() {
325+
use datafusion::physical_plan::empty::EmptyExec;
326+
let projected_schema = match projection {
327+
Some(indices) => Arc::new(self.schema.project(indices)?),
328+
None => self.schema.clone(),
329+
};
330+
return Ok(Arc::new(EmptyExec::new(projected_schema)));
331+
}
332+
323333
// Combine execution plans
324334
combine_execution_plans(execs)
325335
}
326336
}
327337

328338
/// Combines multiple execution plans into a single plan
329-
///
330-
/// Returns an error if no plans are provided, a single plan if only one exists,
331-
/// or a UnionExec if multiple plans need to be combined.
332339
fn combine_execution_plans(
333340
execs: Vec<Arc<dyn ExecutionPlan>>,
334341
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
335-
if execs.is_empty() {
336-
Err(DataFusionError::Internal("No data files found".into()))
337-
} else if execs.len() == 1 {
342+
if execs.len() == 1 {
338343
Ok(execs.into_iter().next().unwrap())
339344
} else {
340-
// Use UnionExec to combine multiple file scans
341345
use datafusion::physical_plan::union::UnionExec;
342346
Ok(Arc::new(UnionExec::new(execs)))
343347
}

tests/table_tests.rs

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
//! Table provider tests
2+
//!
3+
//! Tests for DuckLakeTable functionality.
4+
5+
use std::sync::Arc;
6+
7+
use arrow::array::Int64Array;
8+
use datafusion::error::Result as DataFusionResult;
9+
use datafusion::prelude::*;
10+
use datafusion_ducklake::{DuckLakeCatalog, DuckdbMetadataProvider};
11+
use tempfile::TempDir;
12+
13+
/// Creates a catalog with an empty table (no data files)
14+
fn create_empty_table_catalog(catalog_path: &std::path::Path) -> anyhow::Result<()> {
15+
let conn = duckdb::Connection::open_in_memory()?;
16+
17+
conn.execute("INSTALL ducklake;", [])?;
18+
conn.execute("LOAD ducklake;", [])?;
19+
20+
// Create data directory (DuckLake only creates it on first INSERT)
21+
let data_dir = catalog_path.with_extension("ducklake.files");
22+
std::fs::create_dir_all(&data_dir)?;
23+
24+
let ducklake_path = format!("ducklake:{}", catalog_path.display());
25+
conn.execute(&format!("ATTACH '{}' AS test_catalog;", ducklake_path), [])?;
26+
27+
// Multiple columns for projection tests
28+
conn.execute(
29+
"CREATE TABLE test_catalog.tbl (a INTEGER, b VARCHAR, c DOUBLE);",
30+
[],
31+
)?;
32+
33+
// No INSERT - table has no data files
34+
35+
Ok(())
36+
}
37+
38+
fn create_catalog(path: &str) -> DataFusionResult<Arc<DuckLakeCatalog>> {
39+
let provider = DuckdbMetadataProvider::new(path)
40+
.map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?;
41+
let catalog = DuckLakeCatalog::new(provider)
42+
.map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?;
43+
Ok(Arc::new(catalog))
44+
}
45+
46+
/// Helper to setup test context
47+
async fn setup_empty_table_context(name: &str) -> DataFusionResult<SessionContext> {
48+
let temp_dir =
49+
TempDir::new().map_err(|e| datafusion::error::DataFusionError::External(Box::new(e)))?;
50+
let catalog_path = temp_dir.path().join(format!("{}.ducklake", name));
51+
52+
create_empty_table_catalog(&catalog_path)
53+
.map_err(|e| datafusion::error::DataFusionError::External(e.into()))?;
54+
55+
let catalog = create_catalog(&catalog_path.to_string_lossy())?;
56+
let ctx = SessionContext::new();
57+
ctx.register_catalog("ducklake", catalog);
58+
59+
// Keep temp_dir alive by leaking it (test cleanup handles it)
60+
std::mem::forget(temp_dir);
61+
62+
Ok(ctx)
63+
}
64+
65+
/// Test basic empty table scan
66+
#[tokio::test]
67+
async fn test_empty_table_basic_scan() -> DataFusionResult<()> {
68+
let ctx = setup_empty_table_context("basic").await?;
69+
70+
let df = ctx.sql("SELECT * FROM ducklake.main.tbl").await?;
71+
let batches = df.collect().await?;
72+
73+
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
74+
assert_eq!(total_rows, 0);
75+
76+
Ok(())
77+
}
78+
79+
/// Test empty table with projection
80+
#[tokio::test]
81+
async fn test_empty_table_projection() -> DataFusionResult<()> {
82+
let ctx = setup_empty_table_context("proj").await?;
83+
84+
let df = ctx.sql("SELECT a FROM ducklake.main.tbl").await?;
85+
let schema = df.schema().clone();
86+
let batches = df.collect().await?;
87+
88+
// Verify schema has only projected column
89+
assert_eq!(schema.fields().len(), 1);
90+
assert_eq!(schema.field(0).name(), "a");
91+
92+
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
93+
assert_eq!(total_rows, 0);
94+
95+
Ok(())
96+
}
97+
98+
/// Test empty table with reordered projection
99+
#[tokio::test]
100+
async fn test_empty_table_reordered_projection() -> DataFusionResult<()> {
101+
let ctx = setup_empty_table_context("reorder").await?;
102+
103+
let df = ctx.sql("SELECT c, a FROM ducklake.main.tbl").await?;
104+
let schema = df.schema().clone();
105+
let batches = df.collect().await?;
106+
107+
// Verify schema has columns in correct order
108+
assert_eq!(schema.fields().len(), 2);
109+
assert_eq!(schema.field(0).name(), "c");
110+
assert_eq!(schema.field(1).name(), "a");
111+
112+
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
113+
assert_eq!(total_rows, 0);
114+
115+
Ok(())
116+
}
117+
118+
/// Test empty table with filter
119+
#[tokio::test]
120+
async fn test_empty_table_with_filter() -> DataFusionResult<()> {
121+
let ctx = setup_empty_table_context("filter").await?;
122+
123+
let df = ctx
124+
.sql("SELECT * FROM ducklake.main.tbl WHERE a > 10")
125+
.await?;
126+
let batches = df.collect().await?;
127+
128+
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
129+
assert_eq!(total_rows, 0);
130+
131+
Ok(())
132+
}
133+
134+
/// Test empty table with aggregate (COUNT)
135+
#[tokio::test]
136+
async fn test_empty_table_aggregate() -> DataFusionResult<()> {
137+
let ctx = setup_empty_table_context("agg").await?;
138+
139+
let df = ctx
140+
.sql("SELECT COUNT(*) as cnt FROM ducklake.main.tbl")
141+
.await?;
142+
let batches = df.collect().await?;
143+
144+
// COUNT on empty table should return 1 row with value 0
145+
assert_eq!(batches.len(), 1);
146+
let batch = &batches[0];
147+
assert_eq!(batch.num_rows(), 1);
148+
149+
let cnt = batch
150+
.column(0)
151+
.as_any()
152+
.downcast_ref::<Int64Array>()
153+
.expect("COUNT should return Int64")
154+
.value(0);
155+
assert_eq!(cnt, 0, "COUNT(*) on empty table should be 0");
156+
157+
Ok(())
158+
}

0 commit comments

Comments
 (0)