Skip to content

Commit 57d5ba3

Browse files
committed
feat: Use one table function per TPCH table
This commit makes the change to have one table function for each TPCH table we want to generate. The idea behind this is that table functions are scoped to a single table so we can have the granularity exposed to the user who wishes to generate a single table and at the same time expose a top level UDF that generates all the tables and builds the catalog implicitely for the user.
1 parent bebde7f commit 57d5ba3

File tree

1 file changed

+15
-21
lines changed

1 file changed

+15
-21
lines changed

src/lib.rs

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,26 @@ use datafusion_expr::Expr;
55
use std::sync::Arc;
66
use tpchgen_arrow::{NationArrow, RecordBatchIterator};
77

8-
/// A Table function that returns a table provider exposing the TPCH
9-
/// dataset.
10-
/// The function takes a single argument, which is an integer value
11-
/// representing the dataset scale factor.
8+
/// Table function that returns the TPCH nation table.
129
#[derive(Debug)]
13-
pub struct TpchgenFunction {}
10+
pub struct TpchNationFunction {}
1411

15-
impl TableFunctionImpl for TpchgenFunction {
16-
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
17-
let Some(Expr::Literal(ScalarValue::Float64(Some(value)))) = exprs.get(0) else {
12+
impl TableFunctionImpl for TpchNationFunction {
13+
fn call(&self, args: &[Expr]) -> Result<Arc<dyn TableProvider>> {
14+
let Some(Expr::Literal(ScalarValue::Float64(Some(value)))) = args.get(0) else {
1815
return plan_err!("First argument must be a float literal.");
1916
};
2017

21-
// Create a list of tuples mapping the TPCH table names to their corresponding
22-
// schemas.
23-
let tables = vec![(
24-
"nation",
25-
tpchgen::generators::NationGenerator::new(*value, 0, 0),
26-
)];
18+
// Init the table generator.
19+
let tablegen = tpchgen::generators::NationGenerator::new(*value, 0, 0);
2720

28-
// Create a single RecordBatch with the value as a single column
29-
let mut table_arrow_gen = NationArrow::new(tables[0].1.clone());
30-
let batch = table_arrow_gen.next().unwrap();
21+
// Init the arrow provider.
22+
let mut arrow_tablegen = NationArrow::new(tablegen);
3123

32-
// Create a MemTable plan that returns the RecordBatch
33-
let provider = MemTable::try_new(table_arrow_gen.schema().clone(), vec![vec![batch]])?;
24+
let batch = arrow_tablegen.next().unwrap();
25+
26+
// Build the memtable plan.
27+
let provider = MemTable::try_new(arrow_tablegen.schema().clone(), vec![vec![batch]])?;
3428

3529
Ok(Arc::new(provider))
3630
}
@@ -44,10 +38,10 @@ mod tests {
4438
#[tokio::test]
4539
async fn test_tpchgen_function() -> Result<()> {
4640
let ctx = SessionContext::new();
47-
ctx.register_udtf("tpchgen", Arc::new(TpchgenFunction {}));
41+
ctx.register_udtf("tpchgen_nation", Arc::new(TpchNationFunction {}));
4842

4943
let df = ctx
50-
.sql("SELECT * FROM tpchgen(1.0)")
44+
.sql("SELECT * FROM tpchgen_nation(1.0)")
5145
.await?
5246
.collect()
5347
.await?;

0 commit comments

Comments
 (0)