Skip to content

Commit 3171514

Browse files
committed
fix(cubesql): Implement remapping for CubeScan in wrapper
* Now column names, introudced by Datafusion, would get renamed, and that would avoid sending too long aliases to Cube for SQL generation, and later to data source * Single CubeScan can represent join of multiple TableScans, they can have different table aliases, and columns on top of CubeScan can have different qualifiers. But generated SQL can have only one table alias, so all column expressions on top needs to be remapped as well
1 parent c558b85 commit 3171514

File tree

1 file changed

+113
-67
lines changed
  • rust/cubesql/cubesql/src/compile/engine/df

1 file changed

+113
-67
lines changed

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 113 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -323,41 +323,33 @@ impl Remapper {
323323
}
324324
}
325325

326-
/// Generate new alias for expression
327-
/// `original_expr` is the one we are generating alias for
328-
/// `expr` can be same or modified, i.e. when previous column remapping is applied.
329-
/// `expr` would be used to generate new alias when `can_rename_columns` is enabled.
330-
/// When `original_expr` is column it would remap both unqualified and qualified colunms to new alias
331-
pub fn add_expr(
332-
&mut self,
333-
schema: &DFSchema,
334-
original_expr: &Expr,
335-
expr: &Expr,
336-
) -> result::Result<String, CubeError> {
326+
fn generate_new_alias(&self, start_from: String) -> String {
337327
static NON_ID_REGEX: LazyLock<Regex> =
338328
LazyLock::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
339329

340-
let original_alias = expr_name(original_expr, schema)?;
341-
let original_alias_key = Column::from_name(&original_alias);
342-
if let Some(alias_column) = self.remapping.get(&original_alias_key) {
343-
return Ok(alias_column.name.clone());
330+
let alias = start_from;
331+
let mut truncated_alias = NON_ID_REGEX
332+
.replace_all(&alias, "_")
333+
.trim_start_matches("_")
334+
.to_lowercase();
335+
truncated_alias.truncate(16);
336+
let mut alias = truncated_alias.clone();
337+
for i in 1..10000 {
338+
if !self.used_targets.contains(&alias) {
339+
break;
340+
}
341+
alias = format!("{}_{}", truncated_alias, i);
344342
}
343+
alias
344+
}
345345

346+
fn new_alias(
347+
&self,
348+
original_alias: &String,
349+
start_from: Option<String>,
350+
) -> result::Result<String, CubeError> {
346351
let alias = if self.can_rename_columns {
347-
let alias = expr_name(&expr, &schema)?;
348-
let mut truncated_alias = NON_ID_REGEX
349-
.replace_all(&alias, "_")
350-
.trim_start_matches("_")
351-
.to_lowercase();
352-
truncated_alias.truncate(16);
353-
let mut alias = truncated_alias.clone();
354-
for i in 1..10000 {
355-
if !self.used_targets.contains(&alias) {
356-
break;
357-
}
358-
alias = format!("{}_{}", truncated_alias, i);
359-
}
360-
alias
352+
self.generate_new_alias(start_from.unwrap_or_else(|| original_alias.clone()))
361353
} else {
362354
original_alias.clone()
363355
};
@@ -368,37 +360,77 @@ impl Remapper {
368360
)));
369361
}
370362

371-
self.used_targets.insert(alias.clone());
372-
self.remapping
373-
.insert(original_alias_key, Column::from_name(&alias));
363+
Ok(alias)
364+
}
365+
366+
fn insert_new_alias(&mut self, original_column: &Column, new_alias: &String) {
367+
self.used_targets.insert(new_alias.clone());
368+
self.remapping.insert(
369+
Column::from_name(&original_column.name),
370+
Column::from_name(new_alias),
371+
);
374372
if let Some(from_alias) = &self.from_alias {
375373
self.remapping.insert(
376374
Column {
377-
name: original_alias.clone(),
375+
name: original_column.name.clone(),
378376
relation: Some(from_alias.clone()),
379377
},
380378
Column {
381-
name: alias.clone(),
379+
name: new_alias.clone(),
382380
relation: Some(from_alias.clone()),
383381
},
384382
);
385-
if let Expr::Column(column) = &original_expr {
386-
if let Some(original_relation) = &column.relation {
387-
if original_relation != from_alias {
388-
self.remapping.insert(
389-
Column {
390-
name: original_alias.clone(),
391-
relation: Some(original_relation.clone()),
392-
},
393-
Column {
394-
name: alias.clone(),
395-
relation: Some(from_alias.clone()),
396-
},
397-
);
398-
}
383+
if let Some(original_relation) = &original_column.relation {
384+
if original_relation != from_alias {
385+
self.remapping.insert(
386+
original_column.clone(),
387+
Column {
388+
name: new_alias.clone(),
389+
relation: Some(from_alias.clone()),
390+
},
391+
);
399392
}
400393
}
401394
}
395+
}
396+
397+
pub fn add_column(&mut self, column: &Column) -> result::Result<String, CubeError> {
398+
if let Some(alias_column) = self.remapping.get(column) {
399+
return Ok(alias_column.name.clone());
400+
}
401+
402+
let new_alias = self.new_alias(&column.name, None)?;
403+
self.insert_new_alias(column, &new_alias);
404+
405+
Ok(new_alias)
406+
}
407+
408+
/// Generate new alias for expression
409+
/// `original_expr` is the one we are generating alias for
410+
/// `expr` can be same or modified, i.e. when previous column remapping is applied.
411+
/// `expr` would be used to generate new alias when `can_rename_columns` is enabled.
412+
/// When `original_expr` is column it would remap both unqualified and qualified colunms to new alias
413+
pub fn add_expr(
414+
&mut self,
415+
schema: &DFSchema,
416+
original_expr: &Expr,
417+
expr: &Expr,
418+
) -> result::Result<String, CubeError> {
419+
let original_alias = expr_name(original_expr, schema)?;
420+
let original_alias_key = Column::from_name(&original_alias);
421+
if let Some(alias_column) = self.remapping.get(&original_alias_key) {
422+
return Ok(alias_column.name.clone());
423+
}
424+
425+
let start_from = expr_name(&expr, &schema)?;
426+
let alias = self.new_alias(&original_alias, Some(start_from))?;
427+
428+
let original_column = if let Expr::Column(column) = &original_expr {
429+
column
430+
} else {
431+
&Column::from_name(original_alias)
432+
};
433+
self.insert_new_alias(original_column, &alias);
402434

403435
Ok(alias)
404436
}
@@ -617,38 +649,52 @@ impl CubeScanWrapperNode {
617649
}
618650
let mut meta_with_user = load_request_meta.as_ref().clone();
619651
meta_with_user.set_change_user(node.options.change_user.clone());
652+
653+
// Single CubeScan can represent join of multiple table scans
654+
// Multiple table scans can have multiple different aliases
655+
// It means that column expressions on top of this node can have multiple different qualifiers
656+
// CubeScan can have only one alias, so we remap every column to use that alias
657+
658+
// Columns in node.schema can have arbitratry names, assigned by DF
659+
// Stuff like `datetrunc(Utf8("month"), col)`
660+
// They can be very long, and contain unwanted character
661+
// So we rename them
662+
663+
let from_alias = node
664+
.schema
665+
.fields()
666+
.iter()
667+
.next()
668+
.and_then(|f| f.qualifier().cloned());
669+
let mut remapper = Remapper::new(from_alias.clone(), true);
670+
let mut member_to_alias = HashMap::new();
671+
for (member, field) in
672+
node.member_fields.iter().zip(node.schema.fields().iter())
673+
{
674+
let alias = remapper.add_column(&field.qualified_column())?;
675+
if let MemberField::Member(f) = member {
676+
member_to_alias.insert(f.to_string(), alias);
677+
}
678+
}
679+
let column_remapping = remapper.into_remapping();
680+
620681
let sql = transport
621682
.sql(
622683
node.span_id.clone(),
623684
node.request.clone(),
624685
node.auth_context,
625686
meta_with_user,
626-
Some(
627-
node.member_fields
628-
.iter()
629-
.zip(node.schema.fields().iter())
630-
.filter_map(|(m, field)| match m {
631-
MemberField::Member(f) => {
632-
Some((f.to_string(), field.name().to_string()))
633-
}
634-
_ => None,
635-
})
636-
.collect(),
637-
),
687+
Some(member_to_alias),
638688
None,
639689
)
640690
.await?;
691+
641692
// TODO Add wrapper for reprojection and literal members handling
642693
return Ok(SqlGenerationResult {
643694
data_source: Some(data_sources[0].clone()),
644-
from_alias: node
645-
.schema
646-
.fields()
647-
.iter()
648-
.next()
649-
.and_then(|f| f.qualifier().cloned()),
695+
from_alias,
650696
sql: sql.sql,
651-
column_remapping: None,
697+
column_remapping,
652698
request: node.request.clone(),
653699
});
654700
} else if let Some(WrappedSelectNode {

0 commit comments

Comments
 (0)