Skip to content

Commit 858f6d5

Browse files
authored
fix(cubesql): Disallow mixing measure and dimension filters in a single FilterOp (#9486)
Before this filter replacer would push a single filter to CubeScan with all of predicates together. Cube.js does not support mixing measures and dimension in a single filter, only as a separate filters. So, now it stops rewriting if resulting filter would contain both measures and dimensions.
1 parent 854e330 commit 858f6d5

File tree

2 files changed

+160
-4
lines changed

2 files changed

+160
-4
lines changed

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,11 @@ impl RewriteRules for FilterRules {
813813
"FilterOpOp:and",
814814
),
815815
),
816-
rewrite(
816+
// OR filters presents an issue: it must be a single filter with LogicalOp inside, so it can't have both measures and dimensions together
817+
// There's no need to check AND operation for measure-dimension mixup
818+
// Any number of AND's between root and terminal filter will be split to separate filters in CubeScan
819+
// It's enough to stop on first OR, because FilterReplacer goes top-down
820+
transforming_rewrite(
817821
"filter-replacer-or",
818822
filter_replacer(
819823
binary_expr("?left", "OR", "?right"),
@@ -828,6 +832,13 @@ impl RewriteRules for FilterRules {
828832
),
829833
"FilterOpOp:or",
830834
),
835+
self.transform_filter_or(
836+
"?left",
837+
"?right",
838+
"?alias_to_cube",
839+
"?members",
840+
"?filter_aliases",
841+
),
831842
),
832843
// Unwrap lower for case-insensitive operators
833844
transforming_rewrite(
@@ -2792,6 +2803,74 @@ impl FilterRules {
27922803
}
27932804
}
27942805

2806+
fn transform_filter_or(
2807+
&self,
2808+
left_var: &'static str,
2809+
right_var: &'static str,
2810+
alias_to_cube_var: &'static str,
2811+
members_var: &'static str,
2812+
filter_aliases_var: &'static str,
2813+
) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool {
2814+
let left_var = var!(left_var);
2815+
let right_var = var!(right_var);
2816+
let alias_to_cube_var = var!(alias_to_cube_var);
2817+
let members_var = var!(members_var);
2818+
let filter_aliases_var = var!(filter_aliases_var);
2819+
let meta_context = self.meta_context.clone();
2820+
move |egraph, subst| {
2821+
let Some(left_columns) = &egraph[subst[left_var]].data.referenced_expr else {
2822+
return false;
2823+
};
2824+
let Some(right_columns) = &egraph[subst[right_var]].data.referenced_expr else {
2825+
return false;
2826+
};
2827+
let columns = left_columns
2828+
.iter()
2829+
.chain(right_columns.iter())
2830+
.cloned()
2831+
.collect::<Vec<_>>();
2832+
2833+
let aliases_es: Vec<Vec<(String, String)>> =
2834+
var_iter!(egraph[subst[filter_aliases_var]], FilterReplacerAliases)
2835+
.cloned()
2836+
.collect();
2837+
for aliases in aliases_es {
2838+
let mut has_dimensions = false;
2839+
let mut has_measures = false;
2840+
2841+
for column in &columns {
2842+
let Expr::Column(column) = column else {
2843+
// Unexpected non-column in referenced_expr
2844+
return false;
2845+
};
2846+
2847+
let Some((member_name, _, cube)) = Self::filter_member_name_on_columns(
2848+
egraph,
2849+
subst,
2850+
&meta_context,
2851+
alias_to_cube_var,
2852+
&[column.clone()],
2853+
members_var,
2854+
&aliases,
2855+
) else {
2856+
// TODO is this necessary? When predicate in a filter references column that is not-a-member, is it ok to push it?
2857+
return false;
2858+
};
2859+
2860+
has_dimensions |= cube.lookup_dimension_by_member_name(&member_name).is_some();
2861+
has_measures |= cube.lookup_measure_by_member_name(&member_name).is_some();
2862+
}
2863+
if has_dimensions && has_measures {
2864+
// This filter references both measure and dimension in a single OR
2865+
// It is not supported by Cube.js
2866+
return false;
2867+
}
2868+
}
2869+
2870+
true
2871+
}
2872+
}
2873+
27952874
fn unwrap_lower_or_upper(
27962875
&self,
27972876
op_var: &'static str,
@@ -3932,6 +4011,30 @@ impl FilterRules {
39324011
column_var: Var,
39334012
members_var: Var,
39344013
aliases: &Vec<(String, String)>,
4014+
) -> Option<(String, Option<String>, &'meta V1CubeMeta)> {
4015+
let columns: Vec<_> = var_iter!(egraph[subst[column_var]], ColumnExprColumn)
4016+
.cloned()
4017+
.collect();
4018+
4019+
Self::filter_member_name_on_columns(
4020+
egraph,
4021+
subst,
4022+
meta_context,
4023+
alias_to_cube_var,
4024+
&columns,
4025+
members_var,
4026+
aliases,
4027+
)
4028+
}
4029+
4030+
fn filter_member_name_on_columns<'meta>(
4031+
egraph: &mut CubeEGraph,
4032+
subst: &Subst,
4033+
meta_context: &'meta MetaContext,
4034+
alias_to_cube_var: Var,
4035+
columns: &[Column],
4036+
members_var: Var,
4037+
aliases: &Vec<(String, String)>,
39354038
) -> Option<(String, Option<String>, &'meta V1CubeMeta)> {
39364039
let alias_to_cubes: Vec<_> =
39374040
var_iter!(egraph[subst[alias_to_cube_var]], FilterReplacerAliasToCube)
@@ -3940,9 +4043,6 @@ impl FilterRules {
39404043
if alias_to_cubes.is_empty() {
39414044
return None;
39424045
}
3943-
let columns: Vec<_> = var_iter!(egraph[subst[column_var]], ColumnExprColumn)
3944-
.cloned()
3945-
.collect();
39464046
for alias_to_cube in alias_to_cubes {
39474047
for column in columns.iter() {
39484048
let alias_name = expr_column_name(&Expr::Column(column.clone()), &None);

rust/cubesql/cubesql/src/compile/test/test_filters.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,59 @@ SELECT dim_str0 FROM MultiTypeCube WHERE (dim_str1 IS NULL OR dim_str1 IN (NULL)
133133
.sql
134134
.contains(r#"\"sql\":\"((${MultiTypeCube.dim_str1} IS NULL) OR (${MultiTypeCube.dim_str1} IN (NULL) AND FALSE))\""#));
135135
}
136+
137+
/// Single filter in CubeScan does not support both measuser in dimensions, so it should not get pushed to CubeScan
138+
#[tokio::test]
139+
async fn test_mixed_filters() {
140+
if !Rewriter::sql_push_down_enabled() {
141+
return;
142+
}
143+
init_testing_logger();
144+
145+
let query_plan = convert_select_to_query_plan(
146+
// language=PostgreSQL
147+
r#"
148+
SELECT
149+
dim_str0,
150+
avgPrice
151+
FROM (
152+
SELECT
153+
dim_str0,
154+
AVG(avgPrice) AS avgPrice
155+
FROM
156+
MultiTypeCube
157+
GROUP BY 1
158+
) t
159+
WHERE
160+
avgPrice > 1
161+
OR (
162+
avgPrice = 1
163+
AND
164+
dim_str0 = 'completed'
165+
)
166+
;
167+
"#
168+
.to_string(),
169+
DatabaseProtocol::PostgreSQL,
170+
)
171+
.await;
172+
173+
let physical_plan = query_plan.as_physical_plan().await.unwrap();
174+
println!(
175+
"Physical plan: {}",
176+
displayable(physical_plan.as_ref()).indent()
177+
);
178+
179+
let logical_plan = query_plan.as_logical_plan();
180+
assert_eq!(
181+
logical_plan.find_cube_scan().request,
182+
V1LoadRequestQuery {
183+
measures: Some(vec!["MultiTypeCube.avgPrice".to_string()]),
184+
dimensions: Some(vec!["MultiTypeCube.dim_str0".to_string()]),
185+
segments: Some(vec![]),
186+
order: Some(vec![]),
187+
filters: None,
188+
..Default::default()
189+
}
190+
);
191+
}

0 commit comments

Comments
 (0)