Skip to content

Commit 2626fc2

Browse files
nuno-fariaalamb
andauthored
fix: Ensure column names do not change with expand_views_at_output (#19019)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #18818. ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Making the schema consistent whether having `datafusion.optimizer.expand_views_at_output` on or off: ```sql > create table t(a int, b varchar); > set datafusion.optimizer.expand_views_at_output = true; -- before > select * from t; +---+-----+ | a | t.b | +---+-----+ +---+-----+ -- now > select * from t; +---+---+ | a | b | +---+---+ +---+---+ ``` ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> - Added an alias when converting expressions. - Added sqllogictests. ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yes. ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> The column names when enabling `datafusion.optimizer.expand_views_at_output` are now the same when having it disabled. <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 4ddee14 commit 2626fc2

File tree

4 files changed

+77
-33
lines changed

4 files changed

+77
-33
lines changed

datafusion/expr/src/expr_rewriter/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,11 @@ fn coerce_exprs_for_schema(
260260
}
261261
#[expect(deprecated)]
262262
Expr::Wildcard { .. } => Ok(expr),
263-
_ => expr.cast_to(new_type, src_schema),
263+
_ => {
264+
// maintain the original name when casting
265+
let name = dst_schema.field(idx).name();
266+
Ok(expr.cast_to(new_type, src_schema)?.alias(name))
267+
}
264268
}
265269
} else {
266270
Ok(expr)

datafusion/optimizer/src/analyzer/type_coercion.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,7 +1305,7 @@ mod test {
13051305
true,
13061306
plan.clone(),
13071307
@r"
1308-
Projection: CAST(a AS LargeUtf8)
1308+
Projection: CAST(a AS LargeUtf8) AS a
13091309
EmptyRelation: rows=0
13101310
"
13111311
)?;
@@ -1341,7 +1341,7 @@ mod test {
13411341
true,
13421342
plan.clone(),
13431343
@r"
1344-
Projection: CAST(a AS LargeUtf8)
1344+
Projection: CAST(a AS LargeUtf8) AS a
13451345
EmptyRelation: rows=0
13461346
"
13471347
)?;
@@ -1371,7 +1371,7 @@ mod test {
13711371
true,
13721372
sort_plan.clone(),
13731373
@r"
1374-
Projection: CAST(a AS LargeUtf8)
1374+
Projection: CAST(a AS LargeUtf8) AS a
13751375
Sort: a ASC NULLS FIRST
13761376
Projection: a
13771377
EmptyRelation: rows=0
@@ -1400,7 +1400,7 @@ mod test {
14001400
true,
14011401
plan.clone(),
14021402
@r"
1403-
Projection: CAST(a AS LargeUtf8)
1403+
Projection: CAST(a AS LargeUtf8) AS a
14041404
Sort: a ASC NULLS FIRST
14051405
Projection: a
14061406
EmptyRelation: rows=0
@@ -1436,7 +1436,7 @@ mod test {
14361436
true,
14371437
plan.clone(),
14381438
@r"
1439-
Projection: CAST(a AS LargeBinary)
1439+
Projection: CAST(a AS LargeBinary) AS a
14401440
EmptyRelation: rows=0
14411441
"
14421442
)?;
@@ -1493,7 +1493,7 @@ mod test {
14931493
true,
14941494
sort_plan.clone(),
14951495
@r"
1496-
Projection: CAST(a AS LargeBinary)
1496+
Projection: CAST(a AS LargeBinary) AS a
14971497
Sort: a ASC NULLS FIRST
14981498
Projection: a
14991499
EmptyRelation: rows=0
@@ -1524,7 +1524,7 @@ mod test {
15241524
true,
15251525
plan.clone(),
15261526
@r"
1527-
Projection: CAST(a AS LargeBinary)
1527+
Projection: CAST(a AS LargeBinary) AS a
15281528
Sort: a ASC NULLS FIRST
15291529
Projection: a
15301530
EmptyRelation: rows=0

datafusion/optimizer/tests/optimizer_integration.rs

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -538,14 +538,15 @@ fn recursive_cte_projection_pushdown() -> Result<()> {
538538
// columns from the base table and recursive table, eliminating unused columns
539539
assert_snapshot!(
540540
format!("{plan}"),
541-
@r#"SubqueryAlias: nodes
542-
RecursiveQuery: is_distinct=false
543-
Projection: test.col_int32 AS id
544-
TableScan: test projection=[col_int32]
545-
Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32)
546-
Filter: nodes.id < Int32(3)
547-
TableScan: nodes projection=[id]
548-
"#
541+
@r"
542+
SubqueryAlias: nodes
543+
RecursiveQuery: is_distinct=false
544+
Projection: test.col_int32 AS id
545+
TableScan: test projection=[col_int32]
546+
Projection: CAST(CAST(nodes.id AS Int64) + Int64(1) AS Int32) AS id
547+
Filter: nodes.id < Int32(3)
548+
TableScan: nodes projection=[id]
549+
"
549550
);
550551
Ok(())
551552
}
@@ -561,14 +562,16 @@ fn recursive_cte_with_aliased_self_reference() -> Result<()> {
561562

562563
assert_snapshot!(
563564
format!("{plan}"),
564-
@r#"SubqueryAlias: nodes
565-
RecursiveQuery: is_distinct=false
566-
Projection: test.col_int32 AS id
567-
TableScan: test projection=[col_int32]
568-
Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32)
569-
SubqueryAlias: child
570-
Filter: nodes.id < Int32(3)
571-
TableScan: nodes projection=[id]"#,
565+
@r"
566+
SubqueryAlias: nodes
567+
RecursiveQuery: is_distinct=false
568+
Projection: test.col_int32 AS id
569+
TableScan: test projection=[col_int32]
570+
Projection: CAST(CAST(child.id AS Int64) + Int64(1) AS Int32) AS id
571+
SubqueryAlias: child
572+
Filter: nodes.id < Int32(3)
573+
TableScan: nodes projection=[id]
574+
",
572575
);
573576
Ok(())
574577
}
@@ -620,15 +623,16 @@ fn recursive_cte_projection_pushdown_baseline() -> Result<()> {
620623
// and only the needed column is selected from the recursive table
621624
assert_snapshot!(
622625
format!("{plan}"),
623-
@r#"SubqueryAlias: countdown
624-
RecursiveQuery: is_distinct=false
625-
Projection: test.col_int32 AS n
626-
Filter: test.col_int32 = Int32(5)
627-
TableScan: test projection=[col_int32]
628-
Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32)
629-
Filter: countdown.n > Int32(1)
630-
TableScan: countdown projection=[n]
631-
"#
626+
@r"
627+
SubqueryAlias: countdown
628+
RecursiveQuery: is_distinct=false
629+
Projection: test.col_int32 AS n
630+
Filter: test.col_int32 = Int32(5)
631+
TableScan: test projection=[col_int32]
632+
Projection: CAST(CAST(countdown.n AS Int64) - Int64(1) AS Int32) AS n
633+
Filter: countdown.n > Int32(1)
634+
TableScan: countdown projection=[n]
635+
"
632636
);
633637
Ok(())
634638
}

datafusion/sqllogictest/test_files/cast.slt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,39 @@ select * from t0 where v0<1e100;
8989

9090
statement ok
9191
drop table t0;
92+
93+
94+
# ensure that automatically casting with "datafusion.optimizer.expand_views_at_output" does not
95+
# change the column name
96+
97+
statement ok
98+
create table t(a int, b varchar);
99+
100+
statement ok
101+
set datafusion.optimizer.expand_views_at_output = true;
102+
103+
query TT
104+
explain select * from t;
105+
----
106+
logical_plan
107+
01)Projection: t.a, CAST(t.b AS LargeUtf8) AS b
108+
02)--TableScan: t projection=[a, b]
109+
physical_plan
110+
01)ProjectionExec: expr=[a@0 as a, CAST(b@1 AS LargeUtf8) as b]
111+
02)--DataSourceExec: partitions=1, partition_sizes=[0]
112+
113+
query TT
114+
explain select b from t;
115+
----
116+
logical_plan
117+
01)Projection: CAST(t.b AS LargeUtf8) AS b
118+
02)--TableScan: t projection=[b]
119+
physical_plan
120+
01)ProjectionExec: expr=[CAST(b@0 AS LargeUtf8) as b]
121+
02)--DataSourceExec: partitions=1, partition_sizes=[0]
122+
123+
statement ok
124+
set datafusion.optimizer.expand_views_at_output = false;
125+
126+
statement ok
127+
drop table t;

0 commit comments

Comments
 (0)