Skip to content

Commit 07a310f

Browse files
Support unparsing UNION for distinct results (apache#15814)
1 parent 03fa3b9 commit 07a310f

File tree

3 files changed

+44
-1
lines changed

3 files changed

+44
-1
lines changed

datafusion/sql/src/unparser/ast.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ pub struct QueryBuilder {
3232
fetch: Option<ast::Fetch>,
3333
locks: Vec<ast::LockClause>,
3434
for_clause: Option<ast::ForClause>,
35+
// If true, we need to unparse LogicalPlan::Union as a SQL `UNION` rather than a `UNION ALL`.
36+
distinct_union: bool,
3537
}
3638

3739
#[allow(dead_code)]
@@ -75,6 +77,13 @@ impl QueryBuilder {
7577
self.for_clause = value;
7678
self
7779
}
80+
pub fn distinct_union(&mut self) -> &mut Self {
81+
self.distinct_union = true;
82+
self
83+
}
84+
pub fn is_distinct_union(&self) -> bool {
85+
self.distinct_union
86+
}
7887
pub fn build(&self) -> Result<ast::Query, BuilderError> {
7988
let order_by = self
8089
.order_by_kind
@@ -112,6 +121,7 @@ impl QueryBuilder {
112121
fetch: Default::default(),
113122
locks: Default::default(),
114123
for_clause: Default::default(),
124+
distinct_union: false,
115125
}
116126
}
117127
}

datafusion/sql/src/unparser/plan.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,23 @@ impl Unparser<'_> {
545545
false,
546546
);
547547
}
548+
549+
// If this distinct is the parent of a Union and we're in a query context,
550+
// then we need to unparse as a `UNION` rather than a `UNION ALL`.
551+
if let Distinct::All(input) = distinct {
552+
if matches!(input.as_ref(), LogicalPlan::Union(_)) {
553+
if let Some(query_mut) = query.as_mut() {
554+
query_mut.distinct_union();
555+
return self.select_to_sql_recursively(
556+
input.as_ref(),
557+
query,
558+
select,
559+
relation,
560+
);
561+
}
562+
}
563+
}
564+
548565
let (select_distinct, input) = match distinct {
549566
Distinct::All(input) => (ast::Distinct::Distinct, input.as_ref()),
550567
Distinct::On(on) => {
@@ -829,14 +846,23 @@ impl Unparser<'_> {
829846
return internal_err!("UNION operator requires at least 2 inputs");
830847
}
831848

849+
let set_quantifier =
850+
if query.as_ref().is_some_and(|q| q.is_distinct_union()) {
851+
// Setting the SetQuantifier to None will unparse as a `UNION`
852+
// rather than a `UNION ALL`.
853+
ast::SetQuantifier::None
854+
} else {
855+
ast::SetQuantifier::All
856+
};
857+
832858
// Build the union expression tree bottom-up by reversing the order
833859
// note that we are also swapping left and right inputs because of the rev
834860
let union_expr = input_exprs
835861
.into_iter()
836862
.rev()
837863
.reduce(|a, b| SetExpr::SetOperation {
838864
op: ast::SetOperator::Union,
839-
set_quantifier: ast::SetQuantifier::All,
865+
set_quantifier,
840866
left: Box::new(b),
841867
right: Box::new(a),
842868
})

datafusion/sql/tests/cases/plan_to_sql.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,13 @@ fn roundtrip_statement() -> Result<()> {
170170
UNION ALL
171171
SELECT j3_string AS col1, j3_id AS id FROM j3
172172
) AS subquery GROUP BY col1, id ORDER BY col1 ASC, id ASC"#,
173+
r#"SELECT col1, id FROM (
174+
SELECT j1_string AS col1, j1_id AS id FROM j1
175+
UNION
176+
SELECT j2_string AS col1, j2_id AS id FROM j2
177+
UNION
178+
SELECT j3_string AS col1, j3_id AS id FROM j3
179+
) AS subquery ORDER BY col1 ASC, id ASC"#,
173180
"SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
174181
last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
175182
first_name from person",

0 commit comments

Comments
 (0)