Skip to content

Commit 6483f66

Browse files
authored
fix(cubesql): Fix SELECT DISTINCT on pushdown (#9144)
* add test_select_distinct_dimensions * started doing transforming_rewrite → "select-distinct-dimensions" * fix tests * wip: implement self.select_distinct_dimensions() * fix tests * some improvements * implement select_distinct_dimensions * adding tests * refactor a bit * improve select_distinct_dimensions() * more tests * more cases covered in select_distinct_dimensions * improve * some improvements
1 parent 60cff30 commit 6483f66

File tree

2 files changed

+282
-5
lines changed

2 files changed

+282
-5
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 182 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8205,6 +8205,184 @@ ORDER BY "source"."str0" ASC
82058205
)
82068206
}
82078207

8208+
#[tokio::test]
8209+
async fn test_select_distinct_dimensions() {
8210+
if !Rewriter::sql_push_down_enabled() {
8211+
return;
8212+
}
8213+
init_testing_logger();
8214+
8215+
let logical_plan = convert_select_to_query_plan(
8216+
"SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce".to_string(),
8217+
DatabaseProtocol::PostgreSQL,
8218+
)
8219+
.await
8220+
.as_logical_plan();
8221+
8222+
println!("logical_plan: {:?}", logical_plan);
8223+
8224+
assert_eq!(
8225+
logical_plan.find_cube_scan().request,
8226+
V1LoadRequestQuery {
8227+
measures: Some(vec![]),
8228+
dimensions: Some(vec![
8229+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8230+
]),
8231+
segments: Some(vec![]),
8232+
order: Some(vec![]),
8233+
..Default::default()
8234+
}
8235+
);
8236+
8237+
let logical_plan = convert_select_to_query_plan(
8238+
"SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100".to_string(),
8239+
DatabaseProtocol::PostgreSQL,
8240+
)
8241+
.await
8242+
.as_logical_plan();
8243+
8244+
println!("logical_plan: {:?}", logical_plan);
8245+
8246+
assert_eq!(
8247+
logical_plan.find_cube_scan().request,
8248+
V1LoadRequestQuery {
8249+
measures: Some(vec![]),
8250+
dimensions: Some(vec![
8251+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8252+
]),
8253+
segments: Some(vec![]),
8254+
order: Some(vec![]),
8255+
limit: Some(100),
8256+
..Default::default()
8257+
}
8258+
);
8259+
8260+
let logical_plan = convert_select_to_query_plan(
8261+
"SELECT DISTINCT * FROM (SELECT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100) q_0".to_string(),
8262+
DatabaseProtocol::PostgreSQL,
8263+
)
8264+
.await
8265+
.as_logical_plan();
8266+
8267+
println!("logical_plan: {:?}", logical_plan);
8268+
8269+
assert_eq!(
8270+
logical_plan.find_cube_scan().request,
8271+
V1LoadRequestQuery {
8272+
measures: Some(vec![]),
8273+
dimensions: Some(vec![
8274+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8275+
]),
8276+
segments: Some(vec![]),
8277+
order: Some(vec![]),
8278+
limit: Some(100),
8279+
ungrouped: Some(true),
8280+
..Default::default()
8281+
}
8282+
);
8283+
8284+
let logical_plan = convert_select_to_query_plan(
8285+
"SELECT DISTINCT customer_gender, order_date FROM KibanaSampleDataEcommerce"
8286+
.to_string(),
8287+
DatabaseProtocol::PostgreSQL,
8288+
)
8289+
.await
8290+
.as_logical_plan();
8291+
8292+
println!("logical_plan: {:?}", logical_plan);
8293+
8294+
assert_eq!(
8295+
logical_plan.find_cube_scan().request,
8296+
V1LoadRequestQuery {
8297+
measures: Some(vec![]),
8298+
dimensions: Some(vec![
8299+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8300+
"KibanaSampleDataEcommerce.order_date".to_string(),
8301+
]),
8302+
segments: Some(vec![]),
8303+
order: Some(vec![]),
8304+
..Default::default()
8305+
}
8306+
);
8307+
8308+
let logical_plan = convert_select_to_query_plan(
8309+
"SELECT DISTINCT MAX(maxPrice) FROM KibanaSampleDataEcommerce".to_string(),
8310+
DatabaseProtocol::PostgreSQL,
8311+
)
8312+
.await
8313+
.as_logical_plan();
8314+
8315+
println!("logical_plan: {:?}", logical_plan);
8316+
8317+
assert_eq!(
8318+
logical_plan.find_cube_scan().request,
8319+
V1LoadRequestQuery {
8320+
measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]),
8321+
dimensions: Some(vec![]),
8322+
segments: Some(vec![]),
8323+
order: Some(vec![]),
8324+
..Default::default()
8325+
}
8326+
);
8327+
8328+
let logical_plan = convert_select_to_query_plan(
8329+
"SELECT DISTINCT * FROM (SELECT customer_gender, MAX(maxPrice) FROM KibanaSampleDataEcommerce GROUP BY 1) q_0".to_string(),
8330+
DatabaseProtocol::PostgreSQL,
8331+
)
8332+
.await
8333+
.as_logical_plan();
8334+
8335+
println!("logical_plan: {:?}", logical_plan);
8336+
8337+
assert_eq!(
8338+
logical_plan.find_cube_scan().request,
8339+
V1LoadRequestQuery {
8340+
measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]),
8341+
dimensions: Some(vec![
8342+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8343+
]),
8344+
segments: Some(vec![]),
8345+
order: Some(vec![]),
8346+
..Default::default()
8347+
}
8348+
);
8349+
8350+
let logical_plan = convert_select_to_query_plan(
8351+
"SELECT DISTINCT * FROM KibanaSampleDataEcommerce".to_string(),
8352+
DatabaseProtocol::PostgreSQL,
8353+
)
8354+
.await
8355+
.as_logical_plan();
8356+
8357+
println!("logical_plan: {:?}", logical_plan);
8358+
8359+
assert_eq!(
8360+
logical_plan.find_cube_scan().request,
8361+
V1LoadRequestQuery {
8362+
measures: Some(vec![
8363+
"KibanaSampleDataEcommerce.count".to_string(),
8364+
"KibanaSampleDataEcommerce.maxPrice".to_string(),
8365+
"KibanaSampleDataEcommerce.sumPrice".to_string(),
8366+
"KibanaSampleDataEcommerce.minPrice".to_string(),
8367+
"KibanaSampleDataEcommerce.avgPrice".to_string(),
8368+
"KibanaSampleDataEcommerce.countDistinct".to_string(),
8369+
]),
8370+
dimensions: Some(vec![
8371+
"KibanaSampleDataEcommerce.order_date".to_string(),
8372+
"KibanaSampleDataEcommerce.last_mod".to_string(),
8373+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
8374+
"KibanaSampleDataEcommerce.notes".to_string(),
8375+
"KibanaSampleDataEcommerce.taxful_total_price".to_string(),
8376+
"KibanaSampleDataEcommerce.has_subscription".to_string(),
8377+
]),
8378+
segments: Some(vec![]),
8379+
order: Some(vec![]),
8380+
ungrouped: Some(true),
8381+
..Default::default()
8382+
}
8383+
)
8384+
}
8385+
82088386
#[tokio::test]
82098387
async fn test_sort_relations() -> Result<(), CubeError> {
82108388
init_testing_logger();
@@ -15665,8 +15843,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1566515843
"KibanaSampleDataEcommerce.customer_gender".to_string(),
1566615844
]),
1566715845
segments: Some(vec![]),
15668-
order: Some(vec![]),
15669-
ungrouped: Some(true),
15846+
order: Some(vec![vec![
15847+
"KibanaSampleDataEcommerce.customer_gender".to_string(),
15848+
"asc".to_string()
15849+
],]),
1567015850
..Default::default()
1567115851
}
1567215852
)

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

Lines changed: 100 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use crate::{
22
compile::rewrite::{
33
agg_fun_expr, aggregate, alias_expr, all_members,
4-
analysis::{ConstantFolding, LogicalPlanData, MemberNamesToExpr, OriginalExpr},
4+
analysis::{ConstantFolding, LogicalPlanData, Member, MemberNamesToExpr, OriginalExpr},
55
binary_expr, cast_expr, change_user_expr, column_expr, cross_join, cube_scan,
66
cube_scan_filters_empty_tail, cube_scan_members, cube_scan_members_empty_tail,
7-
cube_scan_order_empty_tail, dimension_expr, expr_column_name, fun_expr, join, like_expr,
8-
limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr,
7+
cube_scan_order_empty_tail, dimension_expr, distinct, expr_column_name, fun_expr, join,
8+
like_expr, limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr,
99
literal_member, measure_expr, member_pushdown_replacer, member_replacer,
1010
merged_members_replacer, original_expr_name, projection, referenced_columns, rewrite,
1111
rewriter::{CubeEGraph, CubeRewrite, RewriteRules},
@@ -262,6 +262,39 @@ impl RewriteRules for MemberRules {
262262
),
263263
self.push_down_limit("?skip", "?fetch", "?new_skip", "?new_fetch"),
264264
),
265+
transforming_rewrite(
266+
"select-distinct-dimensions",
267+
distinct(cube_scan(
268+
"?alias_to_cube",
269+
"?members",
270+
"?filters",
271+
"?orders",
272+
"CubeScanLimit:None",
273+
"CubeScanOffset:None",
274+
"?split",
275+
"?can_pushdown_join",
276+
"CubeScanWrapped:false",
277+
"?left_ungrouped",
278+
)),
279+
cube_scan(
280+
"?alias_to_cube",
281+
"?members",
282+
"?filters",
283+
"?orders",
284+
"CubeScanLimit:None",
285+
"CubeScanOffset:None",
286+
"?split",
287+
"?can_pushdown_join",
288+
"CubeScanWrapped:false",
289+
"CubeScanUngrouped:false",
290+
),
291+
self.select_distinct_dimensions(
292+
"?alias_to_cube",
293+
"?members",
294+
"?filters",
295+
"?left_ungrouped",
296+
),
297+
),
265298
// MOD function to binary expr
266299
transforming_rewrite_with_root(
267300
"mod-fun-to-binary-expr",
@@ -1478,6 +1511,70 @@ impl MemberRules {
14781511
)
14791512
}
14801513

1514+
fn select_distinct_dimensions(
1515+
&self,
1516+
alias_to_cube_var: &'static str,
1517+
members_var: &'static str,
1518+
filters_var: &'static str,
1519+
left_ungrouped_var: &'static str,
1520+
) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool {
1521+
let alias_to_cube_var = var!(alias_to_cube_var);
1522+
let members_var = var!(members_var);
1523+
let filters_var = var!(filters_var);
1524+
let left_ungrouped_var = var!(left_ungrouped_var);
1525+
let meta_context = self.meta_context.clone();
1526+
1527+
move |egraph, subst| {
1528+
let empty_filters = &egraph[subst[filters_var]]
1529+
.data
1530+
.is_empty_list
1531+
.unwrap_or(true);
1532+
let ungrouped =
1533+
var_iter!(egraph[subst[left_ungrouped_var]], CubeScanUngrouped).any(|v| *v);
1534+
1535+
if !empty_filters && ungrouped {
1536+
return false;
1537+
}
1538+
1539+
let res = match egraph
1540+
.index(subst[members_var])
1541+
.data
1542+
.member_name_to_expr
1543+
.as_ref()
1544+
{
1545+
Some(names_to_expr) => {
1546+
names_to_expr.list.iter().all(|(_, member, _)| {
1547+
// we should allow transform for queries with dimensions only,
1548+
// as it doesn't make sense for measures
1549+
match member {
1550+
Member::Dimension { .. } => true,
1551+
Member::VirtualField { .. } => true,
1552+
Member::LiteralMember { .. } => true,
1553+
_ => false,
1554+
}
1555+
})
1556+
}
1557+
None => {
1558+
// this might be the case of `SELECT DISTINCT *`
1559+
// we need to check that there are only dimensions defined in the referenced cube(s)
1560+
var_iter!(egraph[subst[alias_to_cube_var]], CubeScanAliasToCube)
1561+
.cloned()
1562+
.all(|alias_to_cube| {
1563+
alias_to_cube.iter().all(|(_, cube_name)| {
1564+
if let Some(cube) = meta_context.find_cube_with_name(&cube_name) {
1565+
cube.measures.len() == 0 && cube.segments.len() == 0
1566+
} else {
1567+
false
1568+
}
1569+
})
1570+
})
1571+
}
1572+
};
1573+
1574+
res
1575+
}
1576+
}
1577+
14811578
fn push_down_non_empty_aggregate(
14821579
&self,
14831580
alias_to_cube_var: &'static str,

0 commit comments

Comments
 (0)