Skip to content

Commit 2b05b09

Browse files
AryanBagadealamb
andauthored
feat: Add builder API for CreateExternalTable to reduce verbosity (#19066)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #19039. ## Rationale for this change As per the Issue #19039, many examples of verbose `CreateExternalTable` initialization were found throughout the codebase. The current approach requires specifying all 14 fields even when most use default values, making it: 1. Verbose (most fields are defaults) 2. Hard to see which fields are actually overridden vs defaults This PR implements a builder API to address these issues. ## What changes are included in this PR? - Added `CreateExternalTableBuilder` with required parameters enforced at compile-time - Constructor takes required fields: `builder(name, location, file_type, schema)` - Optional fields can be set via `.with_*()` methods (following DataFusion's builder conventions) - Updated all usages across 5 files to use the new builder API - Reduced typical usage from 15 lines to 4-6 lines Example transformation: **Before** ```rust // Before (15 lines) CreateExternalTable { name, location, file_type: "parquet".to_string(), schema: Arc::new(DFSchema::empty()), table_partition_cols: vec![], if_not_exists: false, or_replace: false, temporary: false, definition: None, order_exprs: vec![], unbounded: false, options: HashMap::new(), constraints: Default::default(), column_defaults: HashMap::new(), } ``` **After** ```rust // After (4 lines) CreateExternalTable::builder(name, location, "parquet", schema) .build() ``` ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yup, all existing tests pass: - cargo test -p datafusion-expr - DDL tests pass - cargo test -p datafusion-catalog - Catalog tests pass - cargo test -p datafusion - Core tests pass - All modified usage sites are in existing test files ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> No breaking changes. <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> The builder API is new one and its for internal code cleanup and reducing redundancy and verboseness --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 482c6b8 commit 2b05b09

File tree

5 files changed

+245
-184
lines changed

5 files changed

+245
-184
lines changed

datafusion/catalog/src/listing_schema.rs

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,22 +127,13 @@ impl ListingSchemaProvider {
127127
.factory
128128
.create(
129129
state,
130-
&CreateExternalTable {
131-
schema: Arc::new(DFSchema::empty()),
130+
&CreateExternalTable::builder(
132131
name,
133-
location: table_url,
134-
file_type: self.format.clone(),
135-
table_partition_cols: vec![],
136-
if_not_exists: false,
137-
or_replace: false,
138-
temporary: false,
139-
definition: None,
140-
order_exprs: vec![],
141-
unbounded: false,
142-
options: Default::default(),
143-
constraints: Default::default(),
144-
column_defaults: Default::default(),
145-
},
132+
table_url,
133+
self.format.clone(),
134+
Arc::new(DFSchema::empty()),
135+
)
136+
.build(),
146137
)
147138
.await?;
148139
let _ =

datafusion/core/src/datasource/listing_table_factory.rs

Lines changed: 58 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ mod tests {
231231
use std::path::PathBuf;
232232

233233
use datafusion_common::parsers::CompressionTypeVariant;
234-
use datafusion_common::{Constraints, DFSchema, TableReference};
234+
use datafusion_common::{DFSchema, TableReference};
235235

236236
#[tokio::test]
237237
async fn test_create_using_non_std_file_ext() {
@@ -245,22 +245,14 @@ mod tests {
245245
let context = SessionContext::new();
246246
let state = context.state();
247247
let name = TableReference::bare("foo");
248-
let cmd = CreateExternalTable {
248+
let cmd = CreateExternalTable::builder(
249249
name,
250-
location: csv_file.path().to_str().unwrap().to_string(),
251-
file_type: "csv".to_string(),
252-
schema: Arc::new(DFSchema::empty()),
253-
table_partition_cols: vec![],
254-
if_not_exists: false,
255-
or_replace: false,
256-
temporary: false,
257-
definition: None,
258-
order_exprs: vec![],
259-
unbounded: false,
260-
options: HashMap::from([("format.has_header".into(), "true".into())]),
261-
constraints: Constraints::default(),
262-
column_defaults: HashMap::new(),
263-
};
250+
csv_file.path().to_str().unwrap().to_string(),
251+
"csv",
252+
Arc::new(DFSchema::empty()),
253+
)
254+
.with_options(HashMap::from([("format.has_header".into(), "true".into())]))
255+
.build();
264256
let table_provider = factory.create(&state, &cmd).await.unwrap();
265257
let listing_table = table_provider
266258
.as_any()
@@ -286,22 +278,14 @@ mod tests {
286278
let mut options = HashMap::new();
287279
options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
288280
options.insert("format.has_header".into(), "true".into());
289-
let cmd = CreateExternalTable {
281+
let cmd = CreateExternalTable::builder(
290282
name,
291-
location: csv_file.path().to_str().unwrap().to_string(),
292-
file_type: "csv".to_string(),
293-
schema: Arc::new(DFSchema::empty()),
294-
table_partition_cols: vec![],
295-
if_not_exists: false,
296-
or_replace: false,
297-
temporary: false,
298-
definition: None,
299-
order_exprs: vec![],
300-
unbounded: false,
301-
options,
302-
constraints: Constraints::default(),
303-
column_defaults: HashMap::new(),
304-
};
283+
csv_file.path().to_str().unwrap().to_string(),
284+
"csv",
285+
Arc::new(DFSchema::empty()),
286+
)
287+
.with_options(options)
288+
.build();
305289
let table_provider = factory.create(&state, &cmd).await.unwrap();
306290
let listing_table = table_provider
307291
.as_any()
@@ -331,22 +315,14 @@ mod tests {
331315
options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
332316
options.insert("format.has_header".into(), "true".into());
333317
options.insert("format.compression".into(), "gzip".into());
334-
let cmd = CreateExternalTable {
318+
let cmd = CreateExternalTable::builder(
335319
name,
336-
location: dir.path().to_str().unwrap().to_string(),
337-
file_type: "csv".to_string(),
338-
schema: Arc::new(DFSchema::empty()),
339-
table_partition_cols: vec![],
340-
if_not_exists: false,
341-
or_replace: false,
342-
temporary: false,
343-
definition: None,
344-
order_exprs: vec![],
345-
unbounded: false,
346-
options,
347-
constraints: Constraints::default(),
348-
column_defaults: HashMap::new(),
349-
};
320+
dir.path().to_str().unwrap().to_string(),
321+
"csv",
322+
Arc::new(DFSchema::empty()),
323+
)
324+
.with_options(options)
325+
.build();
350326
let table_provider = factory.create(&state, &cmd).await.unwrap();
351327
let listing_table = table_provider
352328
.as_any()
@@ -383,22 +359,14 @@ mod tests {
383359
let mut options = HashMap::new();
384360
options.insert("format.schema_infer_max_rec".to_owned(), "1000".to_owned());
385361
options.insert("format.has_header".into(), "true".into());
386-
let cmd = CreateExternalTable {
362+
let cmd = CreateExternalTable::builder(
387363
name,
388-
location: dir.path().to_str().unwrap().to_string(),
389-
file_type: "csv".to_string(),
390-
schema: Arc::new(DFSchema::empty()),
391-
table_partition_cols: vec![],
392-
if_not_exists: false,
393-
or_replace: false,
394-
temporary: false,
395-
definition: None,
396-
order_exprs: vec![],
397-
unbounded: false,
398-
options,
399-
constraints: Constraints::default(),
400-
column_defaults: HashMap::new(),
401-
};
364+
dir.path().to_str().unwrap().to_string(),
365+
"csv",
366+
Arc::new(DFSchema::empty()),
367+
)
368+
.with_options(options)
369+
.build();
402370
let table_provider = factory.create(&state, &cmd).await.unwrap();
403371
let listing_table = table_provider
404372
.as_any()
@@ -427,22 +395,13 @@ mod tests {
427395
let state = context.state();
428396
let name = TableReference::bare("foo");
429397

430-
let cmd = CreateExternalTable {
398+
let cmd = CreateExternalTable::builder(
431399
name,
432-
location: String::from(path.to_str().unwrap()),
433-
file_type: "parquet".to_string(),
434-
schema: Arc::new(DFSchema::empty()),
435-
table_partition_cols: vec![],
436-
if_not_exists: false,
437-
or_replace: false,
438-
temporary: false,
439-
definition: None,
440-
order_exprs: vec![],
441-
unbounded: false,
442-
options: HashMap::new(),
443-
constraints: Constraints::default(),
444-
column_defaults: HashMap::new(),
445-
};
400+
String::from(path.to_str().unwrap()),
401+
"parquet",
402+
Arc::new(DFSchema::empty()),
403+
)
404+
.build();
446405
let table_provider = factory.create(&state, &cmd).await.unwrap();
447406
let listing_table = table_provider
448407
.as_any()
@@ -467,22 +426,13 @@ mod tests {
467426
let state = context.state();
468427
let name = TableReference::bare("foo");
469428

470-
let cmd = CreateExternalTable {
429+
let cmd = CreateExternalTable::builder(
471430
name,
472-
location: dir.path().to_str().unwrap().to_string(),
473-
file_type: "parquet".to_string(),
474-
schema: Arc::new(DFSchema::empty()),
475-
table_partition_cols: vec![],
476-
if_not_exists: false,
477-
or_replace: false,
478-
temporary: false,
479-
definition: None,
480-
order_exprs: vec![],
481-
unbounded: false,
482-
options: HashMap::new(),
483-
constraints: Constraints::default(),
484-
column_defaults: HashMap::new(),
485-
};
431+
dir.path().to_str().unwrap(),
432+
"parquet",
433+
Arc::new(DFSchema::empty()),
434+
)
435+
.build();
486436
let table_provider = factory.create(&state, &cmd).await.unwrap();
487437
let listing_table = table_provider
488438
.as_any()
@@ -508,22 +458,13 @@ mod tests {
508458
let state = context.state();
509459
let name = TableReference::bare("foo");
510460

511-
let cmd = CreateExternalTable {
461+
let cmd = CreateExternalTable::builder(
512462
name,
513-
location: dir.path().to_str().unwrap().to_string(),
514-
file_type: "parquet".to_string(),
515-
schema: Arc::new(DFSchema::empty()),
516-
table_partition_cols: vec![],
517-
if_not_exists: false,
518-
or_replace: false,
519-
temporary: false,
520-
definition: None,
521-
order_exprs: vec![],
522-
unbounded: false,
523-
options: HashMap::new(),
524-
constraints: Constraints::default(),
525-
column_defaults: HashMap::new(),
526-
};
463+
dir.path().to_str().unwrap().to_string(),
464+
"parquet",
465+
Arc::new(DFSchema::empty()),
466+
)
467+
.build();
527468
let table_provider = factory.create(&state, &cmd).await.unwrap();
528469
let listing_table = table_provider
529470
.as_any()
@@ -558,22 +499,13 @@ mod tests {
558499
let state = context.state();
559500
let name = TableReference::bare("test");
560501

561-
let cmd = CreateExternalTable {
502+
let cmd = CreateExternalTable::builder(
562503
name,
563-
location: location.clone(),
564-
file_type: "parquet".to_string(),
565-
schema: Arc::new(DFSchema::empty()),
566-
table_partition_cols: vec![],
567-
if_not_exists: false,
568-
or_replace: false,
569-
temporary: false,
570-
definition: None,
571-
order_exprs: vec![],
572-
unbounded: false,
573-
options: HashMap::new(),
574-
constraints: Constraints::default(),
575-
column_defaults: HashMap::new(),
576-
};
504+
location.clone(),
505+
"parquet",
506+
Arc::new(DFSchema::empty()),
507+
)
508+
.build();
577509

578510
let _table_provider = factory.create(&state, &cmd).await.unwrap();
579511

@@ -597,22 +529,13 @@ mod tests {
597529
let state = context.state();
598530
let name = TableReference::bare("test");
599531

600-
let cmd = CreateExternalTable {
532+
let cmd = CreateExternalTable::builder(
601533
name,
602534
location,
603-
file_type: "parquet".to_string(),
604-
schema: Arc::new(DFSchema::empty()),
605-
table_partition_cols: vec![],
606-
if_not_exists: false,
607-
or_replace: false,
608-
temporary: false,
609-
definition: None,
610-
order_exprs: vec![],
611-
unbounded: false,
612-
options: HashMap::new(),
613-
constraints: Constraints::default(),
614-
column_defaults: HashMap::new(),
615-
};
535+
"parquet",
536+
Arc::new(DFSchema::empty()),
537+
)
538+
.build();
616539

617540
let _table_provider = factory.create(&state, &cmd).await.unwrap();
618541

0 commit comments

Comments
 (0)