Skip to content

Commit f52c56b

Browse files
authored
Support unparsing implicit lateral UNNEST plan to SQL text (#13824)
* support unparsing the implicit lateral unnest plan * cargo clippy and fmt * refactor for `check_unnest_placeholder_with_outer_ref` * add const for the prefix string of unnest and outer refernece column
1 parent 073a3b1 commit f52c56b

File tree

5 files changed

+181
-18
lines changed

5 files changed

+181
-18
lines changed

datafusion/expr/src/expr.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,14 +2536,19 @@ pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> {
25362536
Ok(s)
25372537
}
25382538

2539+
pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref";
2540+
pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST";
2541+
25392542
/// Format expressions for display as part of a logical plan. In many cases, this will produce
25402543
/// similar output to `Expr.name()` except that column names will be prefixed with '#'.
25412544
impl Display for Expr {
25422545
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
25432546
match self {
25442547
Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
25452548
Expr::Column(c) => write!(f, "{c}"),
2546-
Expr::OuterReferenceColumn(_, c) => write!(f, "outer_ref({c})"),
2549+
Expr::OuterReferenceColumn(_, c) => {
2550+
write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})")
2551+
}
25472552
Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
25482553
Expr::Literal(v) => write!(f, "{v:?}"),
25492554
Expr::Case(case) => {
@@ -2736,7 +2741,7 @@ impl Display for Expr {
27362741
},
27372742
Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"),
27382743
Expr::Unnest(Unnest { expr }) => {
2739-
write!(f, "UNNEST({expr})")
2744+
write!(f, "{UNNEST_COLUMN_PREFIX}({expr})")
27402745
}
27412746
}
27422747
}

datafusion/sql/src/unparser/plan.rs

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ use super::{
3333
Unparser,
3434
};
3535
use crate::unparser::ast::UnnestRelationBuilder;
36-
use crate::unparser::utils::unproject_agg_exprs;
36+
use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_exprs};
3737
use crate::utils::UNNEST_PLACEHOLDER;
3838
use datafusion_common::{
3939
internal_err, not_impl_err,
4040
tree_node::{TransformedResult, TreeNode},
4141
Column, DataFusionError, Result, TableReference,
4242
};
43+
use datafusion_expr::expr::OUTER_REFERENCE_COLUMN_PREFIX;
4344
use datafusion_expr::{
4445
expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
4546
LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest,
@@ -235,9 +236,10 @@ impl Unparser<'_> {
235236
plan: &LogicalPlan,
236237
relation: &mut RelationBuilder,
237238
alias: Option<ast::TableAlias>,
239+
lateral: bool,
238240
) -> Result<()> {
239241
let mut derived_builder = DerivedRelationBuilder::default();
240-
derived_builder.lateral(false).alias(alias).subquery({
242+
derived_builder.lateral(lateral).alias(alias).subquery({
241243
let inner_statement = self.plan_to_sql(plan)?;
242244
if let ast::Statement::Query(inner_query) = inner_statement {
243245
inner_query
@@ -257,15 +259,17 @@ impl Unparser<'_> {
257259
alias: &str,
258260
plan: &LogicalPlan,
259261
relation: &mut RelationBuilder,
262+
lateral: bool,
260263
) -> Result<()> {
261264
if self.dialect.requires_derived_table_alias() {
262265
self.derive(
263266
plan,
264267
relation,
265268
Some(self.new_table_alias(alias.to_string(), vec![])),
269+
lateral,
266270
)
267271
} else {
268-
self.derive(plan, relation, None)
272+
self.derive(plan, relation, None, lateral)
269273
}
270274
}
271275

@@ -317,10 +321,12 @@ impl Unparser<'_> {
317321
// Projection can be top-level plan for unnest relation
318322
// The projection generated by the `RecursiveUnnestRewriter` from a UNNEST relation will have
319323
// only one expression, which is the placeholder column generated by the rewriter.
320-
if self.dialect.unnest_as_table_factor()
321-
&& p.expr.len() == 1
322-
&& Self::is_unnest_placeholder(&p.expr[0])
323-
{
324+
let unnest_input_type = if p.expr.len() == 1 {
325+
Self::check_unnest_placeholder_with_outer_ref(&p.expr[0])
326+
} else {
327+
None
328+
};
329+
if self.dialect.unnest_as_table_factor() && unnest_input_type.is_some() {
324330
if let LogicalPlan::Unnest(unnest) = &p.input.as_ref() {
325331
return self
326332
.unnest_to_table_factor_sql(unnest, query, select, relation);
@@ -333,6 +339,9 @@ impl Unparser<'_> {
333339
"derived_projection",
334340
plan,
335341
relation,
342+
unnest_input_type
343+
.filter(|t| matches!(t, UnnestInputType::OuterReference))
344+
.is_some(),
336345
);
337346
}
338347
self.reconstruct_select_statement(plan, p, select)?;
@@ -365,6 +374,7 @@ impl Unparser<'_> {
365374
"derived_limit",
366375
plan,
367376
relation,
377+
false,
368378
);
369379
}
370380
if let Some(fetch) = &limit.fetch {
@@ -402,6 +412,7 @@ impl Unparser<'_> {
402412
"derived_sort",
403413
plan,
404414
relation,
415+
false,
405416
);
406417
}
407418
let Some(query_ref) = query else {
@@ -472,6 +483,7 @@ impl Unparser<'_> {
472483
"derived_distinct",
473484
plan,
474485
relation,
486+
false,
475487
);
476488
}
477489
let (select_distinct, input) = match distinct {
@@ -658,6 +670,7 @@ impl Unparser<'_> {
658670
"derived_union",
659671
plan,
660672
relation,
673+
false,
661674
);
662675
}
663676

@@ -723,19 +736,54 @@ impl Unparser<'_> {
723736
internal_err!("Unnest input is not a Projection: {unnest:?}")
724737
}
725738
}
726-
_ => not_impl_err!("Unsupported operator: {plan:?}"),
739+
LogicalPlan::Subquery(subquery)
740+
if find_unnest_node_until_relation(subquery.subquery.as_ref())
741+
.is_some() =>
742+
{
743+
if self.dialect.unnest_as_table_factor() {
744+
self.select_to_sql_recursively(
745+
subquery.subquery.as_ref(),
746+
query,
747+
select,
748+
relation,
749+
)
750+
} else {
751+
self.derive_with_dialect_alias(
752+
"derived_unnest",
753+
subquery.subquery.as_ref(),
754+
relation,
755+
true,
756+
)
757+
}
758+
}
759+
_ => {
760+
not_impl_err!("Unsupported operator: {plan:?}")
761+
}
727762
}
728763
}
729764

730-
/// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`
731-
/// Only match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`
732-
fn is_unnest_placeholder(expr: &Expr) -> bool {
765+
/// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`.
766+
///
767+
/// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`,
768+
/// it means it is a scalar column, return [UnnestInputType::Scalar].
769+
/// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`,
770+
/// it means it is an outer reference column, return [UnnestInputType::OuterReference].
771+
/// - If the column is not a placeholder column, return [None].
772+
///
773+
/// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
774+
fn check_unnest_placeholder_with_outer_ref(expr: &Expr) -> Option<UnnestInputType> {
733775
if let Expr::Alias(Alias { expr, .. }) = expr {
734776
if let Expr::Column(Column { name, .. }) = expr.as_ref() {
735-
return name.starts_with(UNNEST_PLACEHOLDER);
777+
if let Some(prefix) = name.strip_prefix(UNNEST_PLACEHOLDER) {
778+
if prefix.starts_with(&format!("({}(", OUTER_REFERENCE_COLUMN_PREFIX))
779+
{
780+
return Some(UnnestInputType::OuterReference);
781+
}
782+
return Some(UnnestInputType::Scalar);
783+
}
736784
}
737785
}
738-
false
786+
None
739787
}
740788

741789
fn unnest_to_table_factor_sql(
@@ -1092,3 +1140,12 @@ impl From<BuilderError> for DataFusionError {
10921140
DataFusionError::External(Box::new(e))
10931141
}
10941142
}
1143+
1144+
/// The type of the input to the UNNEST table factor.
1145+
#[derive(Debug)]
1146+
enum UnnestInputType {
1147+
/// The input is a column reference. It will be presented like `outer_ref(column_name)`.
1148+
OuterReference,
1149+
/// The input is a scalar value. It will be presented like a scalar array or struct.
1150+
Scalar,
1151+
}

datafusion/sql/src/unparser/rewrite.rs

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use datafusion_common::{
2323
tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
2424
Column, HashMap, Result, TableReference,
2525
};
26-
use datafusion_expr::expr::Alias;
26+
use datafusion_expr::expr::{Alias, UNNEST_COLUMN_PREFIX};
2727
use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
2828
use sqlparser::ast::Ident;
2929

@@ -190,10 +190,11 @@ pub(super) fn rewrite_plan_for_sort_on_non_projected_fields(
190190
}
191191
}
192192

193-
/// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
194-
/// subquery
193+
/// This logic is to work out the columns and inner query for SubqueryAlias plan for some types of
194+
/// subquery or unnest
195195
/// - `(SELECT column_a as a from table) AS A`
196196
/// - `(SELECT column_a from table) AS A (a)`
197+
/// - `SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)` (see [find_unnest_column_alias])
197198
///
198199
/// A roundtrip example for table alias with columns
199200
///
@@ -222,6 +223,15 @@ pub(super) fn subquery_alias_inner_query_and_columns(
222223
) -> (&LogicalPlan, Vec<Ident>) {
223224
let plan: &LogicalPlan = subquery_alias.input.as_ref();
224225

226+
if let LogicalPlan::Subquery(subquery) = plan {
227+
let (inner_projection, Some(column)) =
228+
find_unnest_column_alias(subquery.subquery.as_ref())
229+
else {
230+
return (plan, vec![]);
231+
};
232+
return (inner_projection, vec![Ident::new(column)]);
233+
}
234+
225235
let LogicalPlan::Projection(outer_projections) = plan else {
226236
return (plan, vec![]);
227237
};
@@ -257,6 +267,48 @@ pub(super) fn subquery_alias_inner_query_and_columns(
257267
(outer_projections.input.as_ref(), columns)
258268
}
259269

270+
/// Try to find the column alias for UNNEST in the inner projection.
271+
/// For example:
272+
/// ```sql
273+
/// SELECT * FROM t1 CROSS JOIN UNNEST(t1.c1) AS u(c1)
274+
/// ```
275+
/// The above query will be parsed into the following plan:
276+
/// ```text
277+
/// Projection: *
278+
/// Cross Join:
279+
/// SubqueryAlias: t1
280+
/// TableScan: t
281+
/// SubqueryAlias: u
282+
/// Subquery:
283+
/// Projection: UNNEST(outer_ref(t1.c1)) AS c1
284+
/// Projection: __unnest_placeholder(outer_ref(t1.c1),depth=1) AS UNNEST(outer_ref(t1.c1))
285+
/// Unnest: lists[__unnest_placeholder(outer_ref(t1.c1))|depth=1] structs[]
286+
/// Projection: outer_ref(t1.c1) AS __unnest_placeholder(outer_ref(t1.c1))
287+
/// EmptyRelation
288+
/// ```
289+
/// The function will return the inner projection and the column alias `c1` if the column name
290+
/// starts with `UNNEST(` (the `Display` result of [Expr::Unnest]) in the inner projection.
291+
pub(super) fn find_unnest_column_alias(
292+
plan: &LogicalPlan,
293+
) -> (&LogicalPlan, Option<String>) {
294+
if let LogicalPlan::Projection(projection) = plan {
295+
if projection.expr.len() != 1 {
296+
return (plan, None);
297+
}
298+
if let Some(Expr::Alias(alias)) = projection.expr.first() {
299+
if alias
300+
.expr
301+
.schema_name()
302+
.to_string()
303+
.starts_with(&format!("{UNNEST_COLUMN_PREFIX}("))
304+
{
305+
return (projection.input.as_ref(), Some(alias.name.clone()));
306+
}
307+
}
308+
}
309+
(plan, None)
310+
}
311+
260312
/// Injects column aliases into a subquery's logical plan. The function searches for a `Projection`
261313
/// within the given plan, which may be wrapped by other operators (e.g., LIMIT, SORT).
262314
/// If the top-level plan is a `Projection`, it directly injects the column aliases.

datafusion/sql/src/unparser/utils.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,31 @@ pub(crate) fn find_unnest_node_within_select(plan: &LogicalPlan) -> Option<&Unne
8989
}
9090
}
9191

92+
/// Recursively searches children of [LogicalPlan] to find Unnest node if exist
93+
/// until encountering a Relation node with single input
94+
pub(crate) fn find_unnest_node_until_relation(plan: &LogicalPlan) -> Option<&Unnest> {
95+
// Note that none of the nodes that have a corresponding node can have more
96+
// than 1 input node. E.g. Projection / Filter always have 1 input node.
97+
let input = plan.inputs();
98+
let input = if input.len() > 1 {
99+
return None;
100+
} else {
101+
input.first()?
102+
};
103+
104+
if let LogicalPlan::Unnest(unnest) = input {
105+
Some(unnest)
106+
} else if let LogicalPlan::TableScan(_) = input {
107+
None
108+
} else if let LogicalPlan::Subquery(_) = input {
109+
None
110+
} else if let LogicalPlan::SubqueryAlias(_) = input {
111+
None
112+
} else {
113+
find_unnest_node_within_select(input)
114+
}
115+
}
116+
92117
/// Recursively searches children of [LogicalPlan] to find Window nodes if exist
93118
/// prior to encountering a Join, TableScan, or a nested subquery (derived table factor).
94119
/// If Window node is not found prior to this or at all before reaching the end

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,30 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
615615
parser_dialect: Box::new(GenericDialect {}),
616616
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
617617
},
618+
TestStatementWithDialect {
619+
sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
620+
expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
621+
parser_dialect: Box::new(GenericDialect {}),
622+
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
623+
},
624+
TestStatementWithDialect {
625+
sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
626+
expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#,
627+
parser_dialect: Box::new(GenericDialect {}),
628+
unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
629+
},
630+
TestStatementWithDialect {
631+
sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
632+
expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#,
633+
parser_dialect: Box::new(GenericDialect {}),
634+
unparser_dialect: Box::new(UnparserDefaultDialect {}),
635+
},
636+
TestStatementWithDialect {
637+
sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
638+
expected: r#"SELECT * FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#,
639+
parser_dialect: Box::new(GenericDialect {}),
640+
unparser_dialect: Box::new(UnparserDefaultDialect {}),
641+
},
618642
];
619643

620644
for query in tests {

0 commit comments

Comments
 (0)