Skip to content

Commit 982668b

Browse files
committed
Avoid API churn with project_schema
1 parent 2c0746c commit 982668b

File tree

10 files changed

+41
-35
lines changed

10 files changed

+41
-35
lines changed

datafusion-examples/examples/custom_data_source/custom_datasource.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,7 @@ impl CustomExec {
202202
schema: SchemaRef,
203203
db: CustomDataSource,
204204
) -> Self {
205-
let projected_schema =
206-
project_schema(&schema, projections.map(|v| v.as_ref())).unwrap();
205+
let projected_schema = project_schema(&schema, projections).unwrap();
207206
let cache = Self::compute_properties(projected_schema.clone());
208207
Self {
209208
db,

datafusion/catalog-listing/src/table.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ impl TableProvider for ListingTable {
522522

523523
// if no files need to be read, return an `EmptyExec`
524524
if partitioned_file_lists.is_empty() {
525-
let projected_schema = project_schema(&self.schema(), projection.as_deref())?;
525+
let projected_schema = project_schema(&self.schema(), projection.as_ref())?;
526526
return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema))));
527527
}
528528

datafusion/common/src/stats.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,12 @@ impl Statistics {
391391
/// For example, if we had statistics for columns `{"a", "b", "c"}`,
392392
/// projecting to `vec![2, 1]` would return statistics for columns `{"c",
393393
/// "b"}`.
394-
pub fn project(mut self, projection: Option<&[usize]>) -> Self {
395-
let Some(projection) = projection.map(AsRef::as_ref) else {
394+
pub fn project<P: AsRef<[usize]>>(self, p: Option<&P>) -> Self {
395+
self.project_inner(p.as_ref().map(|p| p.as_ref()))
396+
}
397+
398+
pub fn project_inner(mut self, projection: Option<&[usize]>) -> Self {
399+
let Some(projection) = projection else {
396400
return self;
397401
};
398402

@@ -1066,29 +1070,29 @@ mod tests {
10661070

10671071
#[test]
10681072
fn test_project_none() {
1069-
let projection: Option<&[usize]> = None;
1070-
let stats = make_stats(vec![10, 20, 30]).project(projection);
1073+
let projection: Option<Vec<usize>> = None;
1074+
let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
10711075
assert_eq!(stats, make_stats(vec![10, 20, 30]));
10721076
}
10731077

10741078
#[test]
10751079
fn test_project_empty() {
10761080
let projection = Some(vec![]);
1077-
let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref());
1081+
let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
10781082
assert_eq!(stats, make_stats(vec![]));
10791083
}
10801084

10811085
#[test]
10821086
fn test_project_swap() {
10831087
let projection = Some(vec![2, 1]);
1084-
let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref());
1088+
let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
10851089
assert_eq!(stats, make_stats(vec![30, 20]));
10861090
}
10871091

10881092
#[test]
10891093
fn test_project_repeated() {
10901094
let projection = Some(vec![1, 2, 1, 1, 0, 2]);
1091-
let stats = make_stats(vec![10, 20, 30]).project(projection.as_deref());
1095+
let stats = make_stats(vec![10, 20, 30]).project(projection.as_ref());
10921096
assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
10931097
}
10941098

datafusion/common/src/utils/mod.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ use std::thread::available_parallelism;
5959
///
6060
/// // Pick columns 'c' and 'b'
6161
/// let projection = Some(vec![2, 1]);
62-
/// let projected_schema = project_schema(&schema, projection.as_deref()).unwrap();
62+
/// let projected_schema = project_schema(&schema, projection.as_ref()).unwrap();
6363
///
6464
/// let expected_schema = SchemaRef::new(Schema::new(vec![
6565
/// Field::new("c", DataType::Utf8, true),
@@ -68,17 +68,34 @@ use std::thread::available_parallelism;
6868
///
6969
/// assert_eq!(projected_schema, expected_schema);
7070
/// ```
71-
pub fn project_schema(
71+
pub fn project_schema<P: AsRef<[usize]>>(
7272
schema: &SchemaRef,
73-
projection: Option<&[usize]>,
73+
projection: Option<P>,
7474
) -> Result<SchemaRef> {
75-
let schema = match projection.map(AsRef::as_ref) {
76-
Some(columns) => Arc::new(schema.project(columns)?),
75+
let schema = match projection {
76+
Some(columns) => Arc::new(schema.project(columns.as_ref())?),
7777
None => Arc::clone(schema),
7878
};
7979
Ok(schema)
8080
}
8181

82+
/// Trait for types that can be converted into a projection (potential subset of columns)
83+
pub trait AsProjection {
84+
/// Returns the projection as a slice of column indices, if applicable
85+
fn as_indices(&self) -> &[usize];
86+
}
87+
88+
impl AsProjection for &Vec<usize> {
89+
fn as_indices(&self) -> &[usize] {
90+
self.as_ref()
91+
}
92+
}
93+
impl AsProjection for &[usize] {
94+
fn as_indices(&self) -> &[usize] {
95+
self
96+
}
97+
}
98+
8299
/// Extracts a row at the specified index from a set of columns and stores it in the provided buffer.
83100
pub fn extract_row_at_idx_to_buf(
84101
columns: &[ArrayRef],

datafusion/core/src/datasource/empty.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ impl TableProvider for EmptyTable {
7777
_limit: Option<usize>,
7878
) -> Result<Arc<dyn ExecutionPlan>> {
7979
// even though there is no data, projections apply
80-
let projected_schema =
81-
project_schema(&self.schema, projection.map(AsRef::as_ref))?;
80+
let projected_schema = project_schema(&self.schema, projection)?;
8281
Ok(Arc::new(
8382
EmptyExec::new(projected_schema).with_partitions(self.partitions),
8483
))

datafusion/core/tests/custom_sources_cases/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ impl CustomExecutionPlan {
8686
fn new(projection: Option<Vec<usize>>) -> Self {
8787
let schema = TEST_CUSTOM_SCHEMA_REF!();
8888
let schema =
89-
project_schema(&schema, projection.as_deref()).expect("projected schema");
89+
project_schema(&schema, projection.as_ref()).expect("projected schema");
9090
let cache = Self::compute_properties(schema);
9191
Self { projection, cache }
9292
}

datafusion/core/tests/physical_optimizer/join_selection.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,7 @@ async fn test_hash_join_swap_on_joins_with_projections(
762762
"ProjectionExec won't be added above if HashJoinExec contains embedded projection",
763763
);
764764

765-
assert_eq!(swapped_join.projection.as_ref().unwrap(), [0_usize]);
765+
assert_eq!(swapped_join.projection, Some(vec![0_usize]));
766766
assert_eq!(swapped.schema().fields.len(), 1);
767767
assert_eq!(swapped.schema().fields[0].name(), "small_col");
768768
Ok(())

datafusion/datasource/src/memory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ impl MemorySourceConfig {
262262
schema: SchemaRef,
263263
projection: Option<Vec<usize>>,
264264
) -> Result<Self> {
265-
let projected_schema = project_schema(&schema, projection.as_deref())?;
265+
let projected_schema = project_schema(&schema, projection.as_ref())?;
266266
Ok(Self {
267267
partitions: partitions.to_vec(),
268268
schema,

datafusion/physical-expr/src/projection.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,13 @@ impl OptionProjectionRef {
855855
/// Applies an optional projection to a [`SchemaRef`], returning the
856856
/// projected schema.
857857
pub fn project_schema(&self, schema: &SchemaRef) -> Result<SchemaRef> {
858-
project_schema(schema, self.inner.as_deref())
858+
project_schema(schema, self.inner.as_ref())
859859
}
860860

861861
/// Applies an optional projection to a [`Statistics`], returning the
862862
/// projected stats.
863863
pub fn project_statistics(&self, stats: Statistics) -> Statistics {
864-
stats.project(self.inner.as_deref())
864+
stats.project(self.inner.as_ref())
865865
}
866866
}
867867

docs/source/library-user-guide/upgrading.md

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,6 @@
2323

2424
**Note:** DataFusion `53.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.
2525

26-
### Schema, statistics project fn take an option slice instead of Vec ref
27-
28-
`project_schema` and `Statistics::project` now take `Option<&[usize]>` instead of `Option<&Vec<usize>>`.
29-
30-
To convert `Option<&Vec<usize>>` into `Option<&[usize]>` you can use `map(|v| v.as_ref())` call,
31-
for example:
32-
33-
```diff
34-
- let projected_schema = project_schema(&schema, projections)?;
35-
+ let projected_schema =
36-
+ project_schema(&schema, projections.map(|v| v.as_ref()))?;
37-
```
38-
3926
### `SimplifyInfo` trait removed, `SimplifyContext` now uses builder-style API
4027

4128
The `SimplifyInfo` trait has been removed and replaced with the concrete `SimplifyContext` struct. This simplifies the expression simplification API and removes the need for trait objects.

0 commit comments

Comments
 (0)