Skip to content

Commit 4f8f7de

Browse files
committed
feat: Allow repeated aliases (auto-realias)
Signed-off-by: Alex Qyoun-ae <[email protected]>
1 parent 016c22f commit 4f8f7de

File tree

3 files changed

+186
-35
lines changed

3 files changed

+186
-35
lines changed

datafusion/core/src/logical_plan/builder.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ use crate::logical_plan::{
5151
CrossJoin, DFField, DFSchema, DFSchemaRef, Limit, Partitioning, Repartition,
5252
SubqueryType, Values,
5353
};
54-
use crate::sql::utils::{group_window_expr_by_sort_keys, resolve_exprs_to_aliases};
54+
use crate::sql::utils::{
55+
group_window_expr_by_sort_keys, realias_duplicate_expr_aliases,
56+
resolve_exprs_to_aliases,
57+
};
5558

5659
/// Default table name for unnamed table
5760
pub const UNNAMED_TABLE: &str = "?table?";
@@ -1222,6 +1225,13 @@ pub fn project_with_alias(
12221225
.push(columnize_expr(normalize_col(e, &plan)?, input_schema)),
12231226
}
12241227
}
1228+
1229+
// NOTE (cubesql): realias expressions that have the same name and qualifier
1230+
if alias.is_some() {
1231+
projected_expr =
1232+
realias_duplicate_expr_aliases(projected_expr, input_schema, alias.clone())?;
1233+
}
1234+
12251235
validate_unique_names("Projections", projected_expr.iter(), input_schema)?;
12261236
let input_schema = DFSchema::new_with_metadata(
12271237
exprlist_to_fields(&projected_expr, &plan)?,

datafusion/core/src/sql/planner.rs

Lines changed: 84 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ use crate::logical_plan::{
3838
use crate::optimizer::utils::exprlist_to_columns;
3939
use crate::prelude::JoinType;
4040
use crate::scalar::ScalarValue;
41-
use crate::sql::utils::{find_udtf_exprs, make_decimal_type, normalize_ident};
41+
use crate::sql::utils::{
42+
find_udtf_exprs, make_decimal_type, normalize_ident, realias_duplicate_expr_aliases,
43+
};
4244
use crate::{
4345
error::{DataFusionError, Result},
4446
physical_plan::aggregates,
@@ -1038,6 +1040,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
10381040
);
10391041
}
10401042

1043+
// NOTE (cubesql): realias expressions that have the same name and qualifier
1044+
let select_exprs =
1045+
realias_duplicate_expr_aliases(select_exprs, plan.schema(), None)?;
1046+
10411047
// having and group by clause may reference aliases defined in select projection
10421048
let projected_plan = self.project(plan.clone(), select_exprs.clone())?;
10431049

@@ -3191,21 +3197,35 @@ mod tests {
31913197

31923198
#[test]
31933199
fn select_repeated_column() {
3194-
let sql = "SELECT age, age FROM person";
3195-
let err = logical_plan(sql).expect_err("query should have failed");
3196-
assert_eq!(
3197-
r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3198-
format!("{:?}", err)
3200+
// let sql = "SELECT age, age FROM person";
3201+
// let err = logical_plan(sql).expect_err("query should have failed");
3202+
// assert_eq!(
3203+
// r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 0 and \"#person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3204+
// format!("{:?}", err)
3205+
// );
3206+
3207+
// NOTE: this is supported with cubesql patches
3208+
quick_test(
3209+
"SELECT age, age FROM person",
3210+
"Projection: #person.age, #person.age AS age__1\
3211+
\n TableScan: person projection=None",
31993212
);
32003213
}
32013214

32023215
#[test]
32033216
fn select_wildcard_with_repeated_column() {
3204-
let sql = "SELECT *, age FROM person";
3205-
let err = logical_plan(sql).expect_err("query should have failed");
3206-
assert_eq!(
3207-
r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3208-
format!("{:?}", err)
3217+
// let sql = "SELECT *, age FROM person";
3218+
// let err = logical_plan(sql).expect_err("query should have failed");
3219+
// assert_eq!(
3220+
// r##"Plan("Projections require unique expression names but the expression \"#person.age\" at position 3 and \"#person.age\" at position 8 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3221+
// format!("{:?}", err)
3222+
// );
3223+
3224+
// NOTE: this is supported with cubesql patches
3225+
quick_test(
3226+
"SELECT *, age FROM person",
3227+
"Projection: #person.id, #person.first_name, #person.last_name, #person.age, #person.state, #person.salary, #person.birth_date, #person.😀, #person.age AS age__1\
3228+
\n TableScan: person projection=None",
32093229
);
32103230
}
32113231

@@ -3779,11 +3799,19 @@ mod tests {
37793799

37803800
#[test]
37813801
fn select_simple_aggregate_repeated_aggregate() {
3782-
let sql = "SELECT MIN(age), MIN(age) FROM person";
3783-
let err = logical_plan(sql).expect_err("query should have failed");
3784-
assert_eq!(
3785-
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3786-
format!("{:?}", err)
3802+
// let sql = "SELECT MIN(age), MIN(age) FROM person";
3803+
// let err = logical_plan(sql).expect_err("query should have failed");
3804+
// assert_eq!(
3805+
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 0 and \"MIN(#person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3806+
// format!("{:?}", err)
3807+
// );
3808+
3809+
// NOTE: this is supported with cubesql patches
3810+
quick_test(
3811+
"SELECT MIN(age), MIN(age) FROM person",
3812+
"Projection: #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\
3813+
\n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\
3814+
\n TableScan: person projection=None",
37873815
);
37883816
}
37893817

@@ -3809,11 +3837,19 @@ mod tests {
38093837

38103838
#[test]
38113839
fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
3812-
let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
3813-
let err = logical_plan(sql).expect_err("query should have failed");
3814-
assert_eq!(
3815-
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3816-
format!("{:?}", err)
3840+
// let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
3841+
// let err = logical_plan(sql).expect_err("query should have failed");
3842+
// assert_eq!(
3843+
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age) AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3844+
// format!("{:?}", err)
3845+
// );
3846+
3847+
// NOTE: this is supported with cubesql patches
3848+
quick_test(
3849+
"SELECT MIN(age) AS a, MIN(age) AS a FROM person",
3850+
"Projection: #MIN(person.age) AS a, #MIN(person.age) AS a__1\
3851+
\n Aggregate: groupBy=[[]], aggr=[[MIN(#person.age)]]\
3852+
\n TableScan: person projection=None",
38173853
);
38183854
}
38193855

@@ -3839,11 +3875,19 @@ mod tests {
38393875

38403876
#[test]
38413877
fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
3842-
let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
3843-
let err = logical_plan(sql).expect_err("query should have failed");
3844-
assert_eq!(
3845-
r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3846-
format!("{:?}", err)
3878+
// let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
3879+
// let err = logical_plan(sql).expect_err("query should have failed");
3880+
// assert_eq!(
3881+
// r##"Plan("Projections require unique expression names but the expression \"#person.state AS a\" at position 0 and \"MIN(#person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.")"##,
3882+
// format!("{:?}", err)
3883+
// );
3884+
3885+
// NOTE: this is supported with cubesql patches
3886+
quick_test(
3887+
"SELECT state AS a, MIN(age) AS a FROM person GROUP BY state",
3888+
"Projection: #person.state AS a, #MIN(person.age) AS a__1\
3889+
\n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\
3890+
\n TableScan: person projection=None",
38473891
);
38483892
}
38493893

@@ -4008,12 +4052,20 @@ mod tests {
40084052

40094053
#[test]
40104054
fn select_simple_aggregate_with_groupby_aggregate_repeated() {
4011-
let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
4012-
let err = logical_plan(sql).expect_err("query should have failed");
4013-
assert_eq!(
4014-
r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##,
4015-
format!("{:?}", err)
4016-
);
4055+
// let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
4056+
// let err = logical_plan(sql).expect_err("query should have failed");
4057+
// assert_eq!(
4058+
// r##"Plan("Projections require unique expression names but the expression \"MIN(#person.age)\" at position 1 and \"MIN(#person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.")"##,
4059+
// format!("{:?}", err)
4060+
// );
4061+
4062+
// NOTE: this is supported with cubesql patches
4063+
quick_test(
4064+
"SELECT state, MIN(age), MIN(age) FROM person GROUP BY state",
4065+
"Projection: #person.state, #MIN(person.age), #MIN(person.age) AS MIN(person.age)__1\
4066+
\n Aggregate: groupBy=[[#person.state]], aggr=[[MIN(#person.age)]]\
4067+
\n TableScan: person projection=None",
4068+
)
40174069
}
40184070

40194071
#[test]

datafusion/core/src/sql/utils.rs

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,16 @@ use arrow::datatypes::{DataType, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION};
2121
use datafusion_common::DFSchema;
2222
use sqlparser::ast::Ident;
2323

24-
use crate::logical_plan::ExprVisitable;
2524
use crate::logical_plan::{Expr, Like, LogicalPlan};
25+
use crate::logical_plan::{ExprSchemable, ExprVisitable};
2626
use crate::scalar::ScalarValue;
2727
use crate::{
2828
error::{DataFusionError, Result},
2929
logical_plan::{Column, ExpressionVisitor, Recursion},
3030
};
3131
use datafusion_expr::expr::GroupingSet;
32-
use std::collections::HashMap;
32+
use std::collections::{HashMap, HashSet};
33+
use std::mem::replace;
3334

3435
/// Collect all deeply nested `Expr::AggregateFunction` and
3536
/// `Expr::AggregateUDF`. They are returned in order of occurrence (depth
@@ -781,6 +782,94 @@ pub(crate) fn normalize_ident(id: Ident) -> String {
781782
id.value
782783
}
783784

785+
/// Structure holding qualifier and name of an alias.
786+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
787+
struct QualifiedAlias {
788+
qualifier: Option<String>,
789+
name: String,
790+
}
791+
792+
impl QualifiedAlias {
793+
fn new(qualifier: Option<String>, name: String) -> Self {
794+
Self { qualifier, name }
795+
}
796+
797+
fn from_expr_schema_and_alias(
798+
expr: &Expr,
799+
schema: &DFSchema,
800+
alias: Option<String>,
801+
) -> Result<Self> {
802+
let field = expr.to_field(schema)?;
803+
let qualifier = alias.or_else(|| field.qualifier().cloned());
804+
let name = field.name().clone();
805+
Ok(Self::new(qualifier, name))
806+
}
807+
808+
fn with_name(&self, name: &str) -> Self {
809+
Self {
810+
qualifier: self.qualifier.clone(),
811+
name: name.to_string(),
812+
}
813+
}
814+
}
815+
816+
/// Realias duplicate expression aliases in the provided list of expressions.
817+
pub(crate) fn realias_duplicate_expr_aliases(
818+
mut exprs: Vec<Expr>,
819+
schema: &DFSchema,
820+
alias: Option<String>,
821+
) -> Result<Vec<Expr>> {
822+
// Two-pass algorithm is used: first collect all the aliases and indices of repeated aliases,
823+
// then realias the collected indices on the second pass.
824+
// This is to avoid realiasing to a name that is valid but is used by another expression
825+
// that was not originally processed.
826+
let mut aliases = HashSet::new();
827+
let mut indices_to_realias = vec![];
828+
for (index, expr) in exprs.iter().enumerate() {
829+
let qualified_alias =
830+
QualifiedAlias::from_expr_schema_and_alias(expr, schema, alias.clone())?;
831+
let is_duplicate = !aliases.insert(qualified_alias);
832+
if is_duplicate {
833+
indices_to_realias.push(index);
834+
}
835+
}
836+
const MAX_SUFFIX_LIMIT: usize = 100;
837+
'outer: for index in indices_to_realias {
838+
let qualified_alias = QualifiedAlias::from_expr_schema_and_alias(
839+
&exprs[index],
840+
schema,
841+
alias.clone(),
842+
)?;
843+
for suffix in 1..=MAX_SUFFIX_LIMIT {
844+
let new_name = format!("{}__{}", qualified_alias.name, suffix);
845+
let new_qualified_alias = qualified_alias.with_name(&new_name);
846+
let is_duplicate = !aliases.insert(new_qualified_alias);
847+
if !is_duplicate {
848+
set_expr_alias(&mut exprs[index], new_name);
849+
continue 'outer;
850+
}
851+
}
852+
return Err(DataFusionError::Internal(format!(
853+
"Unable to realias duplicate expression alias: {:?}",
854+
exprs[index]
855+
)));
856+
}
857+
Ok(exprs)
858+
}
859+
860+
/// Set an alias for an expression, replacing an existing alias or adding one if necessary.
861+
fn set_expr_alias(expr: &mut Expr, alias: String) {
862+
match expr {
863+
Expr::Alias(_, name) => {
864+
*name = alias;
865+
}
866+
_ => {
867+
// Expr::Wildcard is simply a placeholder to please borrow checker
868+
*expr = Expr::Alias(Box::new(replace(expr, Expr::Wildcard)), alias);
869+
}
870+
}
871+
}
872+
784873
#[cfg(test)]
785874
mod tests {
786875
use super::*;

0 commit comments

Comments
 (0)