Skip to content

Commit 2bf1da5

Browse files
authored
feat: add union by name to duckdb (#773)
cc @betolink
1 parent c411904 commit 2bf1da5

File tree

2 files changed

+35
-9
lines changed

2 files changed

+35
-9
lines changed

crates/duckdb/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1212
- Conditionally disable parsing the WKB ([#635](https://github.com/stac-utils/rustac/pull/635))
1313
- `Client.extensions` ([#665](https://github.com/stac-utils/rustac/pull/665))
1414
- Filtering ([#699](https://github.com/stac-utils/rustac/pull/699))
15+
- `union_by_name`, on by default ([#773](https://github.com/stac-utils/rustac/pull/773))
1516

1617
### Removed
1718

crates/duckdb/src/client.rs

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@ pub const DEFAULT_USE_HIVE_PARTITIONING: bool = false;
1515
/// Default convert wkb value.
1616
pub const DEFAULT_CONVERT_WKB: bool = true;
1717

18-
const DEFAULT_COLLECTION_DESCRIPTION: &str =
18+
/// The default collection description.
19+
pub const DEFAULT_COLLECTION_DESCRIPTION: &str =
1920
"Auto-generated collection from stac-geoparquet extents";
2021

22+
/// The default union by name value.
23+
pub const DEFAULT_UNION_BY_NAME: bool = true;
24+
2125
/// A client for making DuckDB requests for STAC objects.
2226
#[derive(Debug)]
2327
pub struct Client {
@@ -30,6 +34,11 @@ pub struct Client {
3034
///
3135
/// If False, WKB metadata will be added.
3236
pub convert_wkb: bool,
37+
38+
/// Whether to use `union_by_name` when querying.
39+
///
40+
/// Defaults to true.
41+
pub union_by_name: bool,
3342
}
3443

3544
impl Client {
@@ -359,14 +368,16 @@ impl Client {
359368
}
360369

361370
fn format_parquet_href(&self, href: &str) -> String {
362-
if self.use_hive_partitioning {
363-
format!(
364-
"read_parquet('{}', filename=true, hive_partitioning=1)",
365-
href
366-
)
367-
} else {
368-
format!("read_parquet('{}', filename=true)", href)
369-
}
371+
format!(
372+
"read_parquet('{}', filename=true, hive_partitioning={}, union_by_name={})",
373+
href,
374+
if self.use_hive_partitioning {
375+
"true"
376+
} else {
377+
"false"
378+
},
379+
if self.union_by_name { "true" } else { "false" }
380+
)
370381
}
371382
}
372383

@@ -413,6 +424,7 @@ impl From<Connection> for Client {
413424
connection,
414425
use_hive_partitioning: DEFAULT_USE_HIVE_PARTITIONING,
415426
convert_wkb: DEFAULT_CONVERT_WKB,
427+
union_by_name: DEFAULT_UNION_BY_NAME,
416428
}
417429
}
418430
}
@@ -672,4 +684,17 @@ mod tests {
672684
.unwrap();
673685
assert_eq!(item_collection.items.len(), 100);
674686
}
687+
688+
#[rstest]
689+
fn union_by_name(client: Client) {
690+
let _ = client.search("data/*.parquet", Default::default()).unwrap();
691+
}
692+
693+
#[rstest]
694+
fn no_union_by_name(mut client: Client) {
695+
client.union_by_name = false;
696+
let _ = client
697+
.search("data/*.parquet", Default::default())
698+
.unwrap_err();
699+
}
675700
}

0 commit comments

Comments
 (0)