Skip to content

Commit aba6430

Browse files
authored
fix(cubesql): Generate proper projection wrapper for duplicated members in CubeScanNode (#9233)
`CubeScanWrapperNode::generate_sql_for_node` does not handle duplicated members in `member_fields`. It results in missing realiasing projection, as it generate only one column for each member. This introduces realiasing to make those columns reappear. `CubeScanExecutionPlan` implements this by iterating over `member_fields` while building columns, so each member in `member_fields` will get it’s own column in DF anyway.
1 parent 31a7a04 commit aba6430

File tree

4 files changed

+125
-5
lines changed

4 files changed

+125
-5
lines changed

packages/cubejs-testing/test/__snapshots__/smoke-cubesql.test.ts.snap

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,13 @@ Array [
378378
},
379379
]
380380
`;
381+
382+
exports[`SQL API Postgres (Data) wrapper with duplicated members: wrapper-duplicated-members 1`] = `
383+
Array [
384+
Object {
385+
"bar": "shipped",
386+
"bar_expr": "0",
387+
"foo": "shipped",
388+
},
389+
]
390+
`;

packages/cubejs-testing/test/smoke-cubesql.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,5 +660,40 @@ filter_subq AS (
660660
const res = await connection.query(query);
661661
expect(res.rows).toMatchSnapshot('query-view-deep-joins');
662662
});
663+
664+
test('wrapper with duplicated members', async () => {
665+
const query = `
666+
SELECT
667+
"foo",
668+
"bar",
669+
CASE
670+
WHEN "bar" = 'new'
671+
THEN 1
672+
ELSE 0
673+
END
674+
AS "bar_expr"
675+
FROM (
676+
SELECT
677+
"rows"."foo" AS "foo",
678+
"rows"."bar" AS "bar"
679+
FROM (
680+
SELECT
681+
"status" AS "foo",
682+
"status" AS "bar"
683+
FROM Orders
684+
) "rows"
685+
GROUP BY
686+
"foo",
687+
"bar"
688+
) "_"
689+
ORDER BY
690+
"bar_expr"
691+
LIMIT 1
692+
;
693+
`;
694+
695+
const res = await connection.query(query);
696+
expect(res.rows).toMatchSnapshot('wrapper-duplicated-members');
697+
});
663698
});
664699
});

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
3535
use std::{
3636
any::Any,
3737
cmp::min,
38-
collections::{HashMap, HashSet},
38+
collections::{hash_map::Entry, HashMap, HashSet},
3939
convert::TryInto,
4040
fmt,
4141
future::Future,
@@ -733,6 +733,7 @@ impl CubeScanWrapperNode {
733733
// And tuning literals to measures would require ugly wrapping with noop aggregation function
734734
// TODO investigate Cube.js joins, try to implement dimension member expression
735735
let mut has_literal_members = false;
736+
let mut has_duplicated_members = false;
736737
let mut wrapper_exprs = vec![];
737738

738739
for (member, field) in
@@ -741,9 +742,18 @@ impl CubeScanWrapperNode {
741742
let alias = remapper.add_column(&field.qualified_column())?;
742743
let expr = match member {
743744
MemberField::Member(f) => {
744-
member_to_alias.insert(f.to_string(), alias.clone());
745-
// `alias` is column name that would be generated by Cube.js, just reference that
746-
Expr::Column(Column::from_name(alias.clone()))
745+
match member_to_alias.entry(f.to_string()) {
746+
Entry::Vacant(entry) => {
747+
entry.insert(alias.clone());
748+
// `alias` is column name that would be generated by Cube.js, just reference that
749+
Expr::Column(Column::from_name(alias.clone()))
750+
}
751+
Entry::Occupied(entry) => {
752+
// This member already has an alias, generate wrapper that would use it
753+
has_duplicated_members = true;
754+
Expr::Column(Column::from_name(entry.get().clone()))
755+
}
756+
}
747757
}
748758
MemberField::Literal(value) => {
749759
has_literal_members = true;
@@ -770,7 +780,7 @@ impl CubeScanWrapperNode {
770780
.await?;
771781

772782
// TODO is this check necessary?
773-
let sql = if has_literal_members {
783+
let sql = if has_literal_members || has_duplicated_members {
774784
// Need to generate wrapper SELECT with literal columns
775785
// Generated columns need to have same aliases as targets in `remapper`
776786
// Because that's what plans higher up would use in generated SQL

rust/cubesql/cubesql/src/compile/test/test_wrapper.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,3 +1456,68 @@ WHERE
14561456
assert_eq!(request.measures.unwrap().len(), 1);
14571457
assert_eq!(request.dimensions.unwrap().len(), 0);
14581458
}
1459+
1460+
#[tokio::test]
1461+
async fn wrapper_duplicated_members() {
1462+
if !Rewriter::sql_push_down_enabled() {
1463+
return;
1464+
}
1465+
init_testing_logger();
1466+
1467+
let query_plan = convert_select_to_query_plan(
1468+
// language=PostgreSQL
1469+
format!(
1470+
r#"
1471+
SELECT
1472+
"foo",
1473+
"bar",
1474+
CASE
1475+
WHEN "bar" IS NOT NULL
1476+
THEN 1
1477+
ELSE 0
1478+
END
1479+
AS "bar_expr"
1480+
FROM (
1481+
SELECT
1482+
"rows"."foo" AS "foo",
1483+
"rows"."bar" AS "bar"
1484+
FROM (
1485+
SELECT
1486+
"dim_str0" AS "foo",
1487+
"dim_str0" AS "bar"
1488+
FROM MultiTypeCube
1489+
) "rows"
1490+
GROUP BY
1491+
"foo",
1492+
"bar"
1493+
) "_"
1494+
ORDER BY
1495+
"bar_expr"
1496+
LIMIT 1
1497+
;
1498+
"#
1499+
)
1500+
.to_string(),
1501+
DatabaseProtocol::PostgreSQL,
1502+
)
1503+
.await;
1504+
1505+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
1506+
println!(
1507+
"Physical plan: {}",
1508+
displayable(physical_plan.as_ref()).indent()
1509+
);
1510+
1511+
let logical_plan = query_plan.as_logical_plan();
1512+
// Generated SQL should contain realiasing of one member to two columns
1513+
assert!(logical_plan
1514+
.find_cube_scan_wrapped_sql()
1515+
.wrapped_sql
1516+
.sql
1517+
.contains(r#""foo" "foo""#));
1518+
assert!(logical_plan
1519+
.find_cube_scan_wrapped_sql()
1520+
.wrapped_sql
1521+
.sql
1522+
.contains(r#""foo" "bar""#));
1523+
}

0 commit comments

Comments
 (0)