Skip to content

Commit 17024b2

Browse files
committed
refactor(cubesql): Extract next remapping in wrapper to separate struct
1 parent b3ec892 commit 17024b2

File tree

1 file changed

+127
-99
lines changed
  • rust/cubesql/cubesql/src/compile/engine/df

1 file changed

+127
-99
lines changed

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 127 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use serde::{Deserialize, Serialize};
3434
use std::{
3535
any::Any,
3636
cmp::min,
37-
collections::HashMap,
37+
collections::{HashMap, HashSet},
3838
convert::TryInto,
3939
fmt,
4040
future::Future,
@@ -252,14 +252,128 @@ impl CubeScanWrapperNode {
252252
}
253253
}
254254

255-
fn expr_name(e: &Expr, schema: &Arc<DFSchema>) -> Result<String> {
255+
fn expr_name(e: &Expr, schema: &DFSchema) -> Result<String> {
256256
match e {
257257
Expr::Column(col) => Ok(col.name.clone()),
258258
Expr::Sort { expr, .. } => expr_name(expr, schema),
259259
_ => e.name(schema),
260260
}
261261
}
262262

263+
/// Builds new column mapping
264+
/// One remapper for one context: all unqualified columns with same name are assumed the same column
265+
struct Remapper {
266+
from_alias: Option<String>,
267+
can_rename_columns: bool,
268+
remapping: HashMap<Column, Column>,
269+
used_targets: HashSet<String>,
270+
}
271+
272+
impl Remapper {
273+
/// Constructs new Remapper
274+
/// `from_alias` would be used as qualifier after remapping
275+
/// When `can_rename_columns` is enabled, column names will be generated.
276+
/// When it's disabled, column names must stay the same.
277+
/// Column qualifiers can change in both cases.
278+
pub fn new(from_alias: Option<String>, can_rename_columns: bool) -> Self {
279+
Remapper {
280+
from_alias,
281+
can_rename_columns,
282+
283+
remapping: HashMap::new(),
284+
used_targets: HashSet::new(),
285+
}
286+
}
287+
288+
/// Generate new alias for expression
289+
/// `original_expr` is the one we are generating alias for
290+
/// `expr` can be same or modified, i.e. when previous column remapping is applied.
291+
/// `expr` would be used to generate new alias when `can_rename_columns` is enabled.
292+
/// When `original_expr` is column it would remap both unqualified and qualified colunms to new alias
293+
pub fn add_expr(
294+
&mut self,
295+
schema: &DFSchema,
296+
original_expr: &Expr,
297+
expr: &Expr,
298+
) -> result::Result<String, CubeError> {
299+
static NON_ID_REGEX: LazyLock<Regex> =
300+
LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
301+
302+
let original_alias = expr_name(original_expr, schema)?;
303+
let original_alias_key = Column::from_name(&original_alias);
304+
if let Some(alias_column) = self.remapping.get(&original_alias_key) {
305+
return Ok(alias_column.name.clone());
306+
}
307+
308+
let alias = if self.can_rename_columns {
309+
let alias = expr_name(&expr, &schema)?;
310+
let mut truncated_alias = NON_ID_REGEX
311+
.replace_all(&alias, "_")
312+
.trim_start_matches("_")
313+
.to_lowercase();
314+
truncated_alias.truncate(16);
315+
let mut alias = truncated_alias.clone();
316+
for i in 1..10000 {
317+
if !self.used_targets.contains(&alias) {
318+
break;
319+
}
320+
alias = format!("{}_{}", truncated_alias, i);
321+
}
322+
alias
323+
} else {
324+
original_alias.clone()
325+
};
326+
327+
if self.used_targets.contains(&alias) {
328+
return Err(CubeError::internal(format!(
329+
"Can't generate SQL for column expr: duplicate alias {alias}"
330+
)));
331+
}
332+
333+
self.used_targets.insert(alias.clone());
334+
self.remapping
335+
.insert(original_alias_key, Column::from_name(&alias));
336+
if let Some(from_alias) = &self.from_alias {
337+
self.remapping.insert(
338+
Column {
339+
name: original_alias.clone(),
340+
relation: Some(from_alias.clone()),
341+
},
342+
Column {
343+
name: alias.clone(),
344+
relation: Some(from_alias.clone()),
345+
},
346+
);
347+
if let Expr::Column(column) = &original_expr {
348+
if let Some(original_relation) = &column.relation {
349+
if original_relation != from_alias {
350+
self.remapping.insert(
351+
Column {
352+
name: original_alias.clone(),
353+
relation: Some(original_relation.clone()),
354+
},
355+
Column {
356+
name: alias.clone(),
357+
relation: Some(from_alias.clone()),
358+
},
359+
);
360+
}
361+
}
362+
}
363+
}
364+
365+
Ok(alias)
366+
}
367+
368+
pub fn into_remapping(self) -> Option<HashMap<Column, Column>> {
369+
if self.remapping.len() > 0 {
370+
Some(self.remapping)
371+
} else {
372+
None
373+
}
374+
}
375+
}
376+
263377
pub struct SqlGenerationResult {
264378
pub data_source: Option<String>,
265379
pub from_alias: Option<String>,
@@ -625,8 +739,8 @@ impl CubeScanWrapperNode {
625739
subqueries_sql.insert(field.qualified_name(), sql_string);
626740
}
627741
let subqueries_sql = Arc::new(subqueries_sql);
628-
let mut next_remapping = HashMap::new();
629742
let alias = alias.or(from_alias.clone());
743+
let mut next_remapper = Remapper::new(alias.clone(), can_rename_columns);
630744
if let Some(data_source) = data_source {
631745
let generator = plan
632746
.meta
@@ -646,8 +760,7 @@ impl CubeScanWrapperNode {
646760
sql,
647761
generator.clone(),
648762
&column_remapping,
649-
&mut next_remapping,
650-
alias.clone(),
763+
&mut next_remapper,
651764
can_rename_columns,
652765
ungrouped_scan_node.clone(),
653766
subqueries_sql.clone(),
@@ -661,8 +774,7 @@ impl CubeScanWrapperNode {
661774
sql,
662775
generator.clone(),
663776
&column_remapping,
664-
&mut next_remapping,
665-
alias.clone(),
777+
&mut next_remapper,
666778
can_rename_columns,
667779
ungrouped_scan_node.clone(),
668780
subqueries_sql.clone(),
@@ -676,8 +788,7 @@ impl CubeScanWrapperNode {
676788
sql,
677789
generator.clone(),
678790
&column_remapping,
679-
&mut next_remapping,
680-
alias.clone(),
791+
&mut next_remapper,
681792
can_rename_columns,
682793
ungrouped_scan_node.clone(),
683794
subqueries_sql.clone(),
@@ -691,8 +802,7 @@ impl CubeScanWrapperNode {
691802
sql,
692803
generator.clone(),
693804
&column_remapping,
694-
&mut next_remapping,
695-
alias.clone(),
805+
&mut next_remapper,
696806
can_rename_columns,
697807
ungrouped_scan_node.clone(),
698808
subqueries_sql.clone(),
@@ -706,8 +816,7 @@ impl CubeScanWrapperNode {
706816
sql,
707817
generator.clone(),
708818
&column_remapping,
709-
&mut next_remapping,
710-
alias.clone(),
819+
&mut next_remapper,
711820
can_rename_columns,
712821
ungrouped_scan_node.clone(),
713822
subqueries_sql.clone(),
@@ -721,8 +830,7 @@ impl CubeScanWrapperNode {
721830
sql,
722831
generator.clone(),
723832
&column_remapping,
724-
&mut next_remapping,
725-
alias.clone(),
833+
&mut next_remapper,
726834
can_rename_columns,
727835
ungrouped_scan_node.clone(),
728836
subqueries_sql.clone(),
@@ -870,11 +978,7 @@ impl CubeScanWrapperNode {
870978
data_source: Some(data_source),
871979
from_alias: alias,
872980
sql: sql_response.sql,
873-
column_remapping: if next_remapping.len() > 0 {
874-
Some(next_remapping)
875-
} else {
876-
None
877-
},
981+
column_remapping: next_remapper.into_remapping(),
878982
request: load_request.clone(),
879983
})
880984
} else {
@@ -915,11 +1019,7 @@ impl CubeScanWrapperNode {
9151019
data_source: Some(data_source),
9161020
from_alias: alias,
9171021
sql,
918-
column_remapping: if next_remapping.len() > 0 {
919-
Some(next_remapping)
920-
} else {
921-
None
922-
},
1022+
column_remapping: next_remapper.into_remapping(),
9231023
request,
9241024
})
9251025
}
@@ -961,15 +1061,11 @@ impl CubeScanWrapperNode {
9611061
mut sql: SqlQuery,
9621062
generator: Arc<dyn SqlGenerator>,
9631063
column_remapping: &Option<HashMap<Column, Column>>,
964-
next_remapping: &mut HashMap<Column, Column>,
965-
from_alias: Option<String>,
1064+
next_remapper: &mut Remapper,
9661065
can_rename_columns: bool,
9671066
ungrouped_scan_node: Option<Arc<CubeScanNode>>,
9681067
subqueries: Arc<HashMap<String, String>>,
9691068
) -> result::Result<(Vec<AliasedColumn>, SqlQuery), CubeError> {
970-
static NON_ID_REGEX: LazyLock<Regex> =
971-
LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
972-
9731069
let mut aliased_columns = Vec::new();
9741070
for original_expr in exprs {
9751071
let expr = if let Some(column_remapping) = column_remapping.as_ref() {
@@ -1006,75 +1102,7 @@ impl CubeScanWrapperNode {
10061102
Self::escape_interpolation_quotes(expr_sql, ungrouped_scan_node.is_some());
10071103
sql = new_sql_query;
10081104

1009-
let original_alias = expr_name(&original_expr, &schema)?;
1010-
let original_alias_key = Column::from_name(&original_alias);
1011-
if let Some(alias_column) = next_remapping.get(&original_alias_key) {
1012-
let alias = alias_column.name.clone();
1013-
aliased_columns.push(AliasedColumn {
1014-
expr: expr_sql,
1015-
alias,
1016-
});
1017-
continue;
1018-
}
1019-
1020-
let alias = if can_rename_columns {
1021-
let alias = expr_name(&expr, &schema)?;
1022-
let mut truncated_alias = NON_ID_REGEX
1023-
.replace_all(&alias, "_")
1024-
.trim_start_matches("_")
1025-
.to_lowercase();
1026-
truncated_alias.truncate(16);
1027-
let mut alias = truncated_alias.clone();
1028-
for i in 1..10000 {
1029-
if !next_remapping
1030-
.iter()
1031-
.any(|(_, v)| v == &Column::from_name(&alias))
1032-
{
1033-
break;
1034-
}
1035-
alias = format!("{}_{}", truncated_alias, i);
1036-
}
1037-
alias
1038-
} else {
1039-
original_alias.clone()
1040-
};
1041-
if !next_remapping.contains_key(&Column::from_name(&alias)) {
1042-
next_remapping.insert(original_alias_key, Column::from_name(&alias));
1043-
if let Some(from_alias) = &from_alias {
1044-
next_remapping.insert(
1045-
Column {
1046-
name: original_alias.clone(),
1047-
relation: Some(from_alias.clone()),
1048-
},
1049-
Column {
1050-
name: alias.clone(),
1051-
relation: Some(from_alias.clone()),
1052-
},
1053-
);
1054-
if let Expr::Column(column) = &original_expr {
1055-
if let Some(original_relation) = &column.relation {
1056-
if original_relation != from_alias {
1057-
next_remapping.insert(
1058-
Column {
1059-
name: original_alias.clone(),
1060-
relation: Some(original_relation.clone()),
1061-
},
1062-
Column {
1063-
name: alias.clone(),
1064-
relation: Some(from_alias.clone()),
1065-
},
1066-
);
1067-
}
1068-
}
1069-
}
1070-
}
1071-
} else {
1072-
return Err(CubeError::internal(format!(
1073-
"Can't generate SQL for column expr: duplicate alias {}",
1074-
alias
1075-
)));
1076-
}
1077-
1105+
let alias = next_remapper.add_expr(&schema, &original_expr, &expr)?;
10781106
aliased_columns.push(AliasedColumn {
10791107
expr: expr_sql,
10801108
alias,

0 commit comments

Comments
 (0)