Skip to content

Commit a4edfae

Browse files
committed
fix(cubesql): dataRange filter isn't being push down to time dimension in case of other filters are used
Fixes #6312
1 parent 5cd0a17 commit a4edfae

File tree

4 files changed

+260
-4
lines changed

4 files changed

+260
-4
lines changed

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3115,6 +3115,56 @@ ORDER BY \"COUNT(count)\" DESC"
31153115
);
31163116
}
31173117

3118+
#[tokio::test]
3119+
async fn superset_pg_time_filter_with_filter() {
3120+
init_logger();
3121+
3122+
let query_plan = convert_select_to_query_plan(
3123+
"SELECT DATE_TRUNC('week', \"order_date\") AS __timestamp,
3124+
count(count) AS \"COUNT(count)\"
3125+
FROM public.\"KibanaSampleDataEcommerce\"
3126+
WHERE \"customer_gender\" = 'female' AND \"order_date\" >= TO_TIMESTAMP('2021-05-15 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
3127+
AND \"order_date\" < TO_TIMESTAMP('2022-05-15 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
3128+
GROUP BY DATE_TRUNC('week', \"order_date\")
3129+
ORDER BY \"COUNT(count)\" DESC"
3130+
.to_string(),
3131+
DatabaseProtocol::PostgreSQL,
3132+
)
3133+
.await;
3134+
3135+
let logical_plan = query_plan.as_logical_plan();
3136+
assert_eq!(
3137+
logical_plan.find_cube_scan().request,
3138+
V1LoadRequestQuery {
3139+
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
3140+
segments: Some(vec![]),
3141+
dimensions: Some(vec![]),
3142+
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
3143+
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
3144+
granularity: Some("week".to_string()),
3145+
date_range: Some(json!(vec![
3146+
"2021-05-15T00:00:00.000Z".to_string(),
3147+
"2022-05-14T23:59:59.999Z".to_string()
3148+
]))
3149+
}]),
3150+
order: Some(vec![vec![
3151+
"KibanaSampleDataEcommerce.count".to_string(),
3152+
"desc".to_string()
3153+
]]),
3154+
limit: None,
3155+
offset: None,
3156+
filters: Some(vec![V1LoadRequestQueryFilterItem {
3157+
member: Some("KibanaSampleDataEcommerce.customer_gender".to_string()),
3158+
operator: Some("equals".to_string()),
3159+
values: Some(vec!["female".to_string()]),
3160+
or: None,
3161+
and: None
3162+
}]),
3163+
ungrouped: None,
3164+
}
3165+
);
3166+
}
3167+
31183168
#[tokio::test]
31193169
async fn power_bi_dimension_only() {
31203170
init_logger();

rust/cubesql/cubesql/src/compile/rewrite/analysis.rs

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use crate::{
44
rewrite::{
55
converter::{is_expr_node, node_to_expr},
66
expr_column_name, AliasExprAlias, AllMembersAlias, AllMembersCube, ChangeUserCube,
7-
ColumnExprColumn, DimensionName, LiteralExprValue, LiteralMemberRelation,
8-
LogicalPlanLanguage, MeasureName, SegmentName, TableScanSourceTableName,
9-
TimeDimensionName, VirtualFieldCube, VirtualFieldName,
7+
ColumnExprColumn, DimensionName, FilterMemberMember, FilterMemberOp, LiteralExprValue,
8+
LiteralMemberRelation, LogicalPlanLanguage, MeasureName, SegmentName,
9+
TableScanSourceTableName, TimeDimensionName, VirtualFieldCube, VirtualFieldName,
1010
},
1111
},
1212
transport::V1CubeMetaExt,
@@ -37,6 +37,7 @@ pub struct LogicalPlanData {
3737
pub constant: Option<ConstantFolding>,
3838
pub constant_in_list: Option<Vec<ScalarValue>>,
3939
pub cube_reference: Option<String>,
40+
pub filter_operators: Option<Vec<(String, String)>>,
4041
pub is_empty_list: Option<bool>,
4142
}
4243

@@ -251,6 +252,34 @@ impl LogicalPlanAnalysis {
251252
}
252253
}
253254

255+
fn make_filter_operators(
256+
egraph: &EGraph<LogicalPlanLanguage, Self>,
257+
enode: &LogicalPlanLanguage,
258+
) -> Option<Vec<(String, String)>> {
259+
let filter_operators = |id| egraph.index(id).data.filter_operators.clone();
260+
match enode {
261+
LogicalPlanLanguage::FilterOp(params) => {
262+
let mut map = Vec::new();
263+
for id in params.iter() {
264+
map.extend(filter_operators(*id)?.into_iter());
265+
}
266+
Some(map)
267+
}
268+
LogicalPlanLanguage::FilterMember(params) => {
269+
let member = var_iter!(egraph[params[0]], FilterMemberMember)
270+
.next()
271+
.unwrap()
272+
.to_string();
273+
let op = var_iter!(egraph[params[1]], FilterMemberOp)
274+
.next()
275+
.unwrap()
276+
.to_string();
277+
Some(vec![(member, op)])
278+
}
279+
_ => None,
280+
}
281+
}
282+
254283
fn make_expr_to_alias(
255284
egraph: &EGraph<LogicalPlanLanguage, Self>,
256285
enode: &LogicalPlanLanguage,
@@ -745,6 +774,7 @@ impl Analysis<LogicalPlanLanguage> for LogicalPlanAnalysis {
745774
constant_in_list: Self::make_constant_in_list(egraph, enode),
746775
cube_reference: Self::make_cube_reference(egraph, enode),
747776
is_empty_list: Self::make_is_empty_list(egraph, enode),
777+
filter_operators: Self::make_filter_operators(egraph, enode),
748778
}
749779
}
750780

@@ -758,6 +788,7 @@ impl Analysis<LogicalPlanLanguage> for LogicalPlanAnalysis {
758788
let (constant, b) = self.merge_option_field(a, b, |d| &mut d.constant);
759789
let (cube_reference, b) = self.merge_option_field(a, b, |d| &mut d.cube_reference);
760790
let (is_empty_list, b) = self.merge_option_field(a, b, |d| &mut d.is_empty_list);
791+
let (filter_operators, b) = self.merge_option_field(a, b, |d| &mut d.filter_operators);
761792
let (column_name, _) = self.merge_option_field(a, b, |d| &mut d.column);
762793
original_expr
763794
| member_name_to_expr
@@ -767,6 +798,7 @@ impl Analysis<LogicalPlanLanguage> for LogicalPlanAnalysis {
767798
| constant
768799
| cube_reference
769800
| column_name
801+
| filter_operators
770802
| is_empty_list
771803
}
772804

rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use crate::{
1818
};
1919
use datafusion::{logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner};
2020
use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason};
21+
use itertools::Itertools;
2122
use serde::{Deserialize, Serialize};
2223
use std::{collections::HashSet, env, fs, sync::Arc, time::Duration};
2324

@@ -345,7 +346,14 @@ impl Rewriter {
345346
vec![]
346347
};
347348
let new_root = Id::from(best.as_ref().len() - 1);
348-
log::debug!("Best: {:?}", best);
349+
log::debug!(
350+
"Best: {}",
351+
best.as_ref()
352+
.iter()
353+
.enumerate()
354+
.map(|(i, n)| format!("{}: {:?}", i, n))
355+
.join(",")
356+
);
349357
let converter =
350358
LanguageToLogicalPlanConverter::new(best, cube_context.clone(), auth_context);
351359
Ok((

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,6 +2238,82 @@ impl RewriteRules for FilterRules {
22382238
"?date_range_end_op",
22392239
),
22402240
),
2241+
transforming_chain_rewrite(
2242+
"filter-replacer-rotate-filter-and-date-range-left",
2243+
filter_op(
2244+
filter_op_filters(
2245+
"?time_dimension_filter",
2246+
filter_op(filter_op_filters("?left", "?right"), "FilterOpOp:and"),
2247+
),
2248+
"FilterOpOp:and",
2249+
),
2250+
vec![(
2251+
"?time_dimension_filter",
2252+
filter_member(
2253+
"?time_dimension_member",
2254+
"?time_dimension_op",
2255+
"?time_dimension_value",
2256+
),
2257+
)],
2258+
filter_op(
2259+
filter_op_filters(
2260+
"?pull_up_member",
2261+
filter_op(
2262+
filter_op_filters("?left_out", "?right_out"),
2263+
"FilterOpOp:and",
2264+
),
2265+
),
2266+
"FilterOpOp:and",
2267+
),
2268+
self.rotate_filter_and_date_range(
2269+
"?time_dimension_filter",
2270+
"?time_dimension_member",
2271+
"?time_dimension_op",
2272+
"?left",
2273+
"?right",
2274+
"?pull_up_member",
2275+
"?left_out",
2276+
"?right_out",
2277+
),
2278+
),
2279+
transforming_chain_rewrite(
2280+
"filter-replacer-rotate-filter-and-date-range-right",
2281+
filter_op(
2282+
filter_op_filters(
2283+
filter_op(filter_op_filters("?left", "?right"), "FilterOpOp:and"),
2284+
"?time_dimension_filter",
2285+
),
2286+
"FilterOpOp:and",
2287+
),
2288+
vec![(
2289+
"?time_dimension_filter",
2290+
filter_member(
2291+
"?time_dimension_member",
2292+
"?time_dimension_op",
2293+
"?time_dimension_value",
2294+
),
2295+
)],
2296+
filter_op(
2297+
filter_op_filters(
2298+
filter_op(
2299+
filter_op_filters("?left_out", "?right_out"),
2300+
"FilterOpOp:and",
2301+
),
2302+
"?pull_up_member",
2303+
),
2304+
"FilterOpOp:and",
2305+
),
2306+
self.rotate_filter_and_date_range(
2307+
"?time_dimension_filter",
2308+
"?time_dimension_member",
2309+
"?time_dimension_op",
2310+
"?left",
2311+
"?right",
2312+
"?pull_up_member",
2313+
"?left_out",
2314+
"?right_out",
2315+
),
2316+
),
22412317
rewrite(
22422318
"in-date-range-to-time-dimension-pull-up-left",
22432319
cube_scan_filters(
@@ -4188,6 +4264,96 @@ impl FilterRules {
41884264
false
41894265
}
41904266
}
4267+
4268+
fn rotate_filter_and_date_range(
4269+
&self,
4270+
time_dimension_filter_var: &'static str,
4271+
time_dimension_member_var: &'static str,
4272+
time_dimension_op_var: &'static str,
4273+
left_var: &'static str,
4274+
right_var: &'static str,
4275+
pull_up_member_var: &'static str,
4276+
left_out_var: &'static str,
4277+
right_out_var: &'static str,
4278+
) -> impl Fn(&mut EGraph<LogicalPlanLanguage, LogicalPlanAnalysis>, &mut Subst) -> bool {
4279+
let time_dimension_filter_var = var!(time_dimension_filter_var);
4280+
let time_dimension_member_var = var!(time_dimension_member_var);
4281+
let time_dimension_op_var = var!(time_dimension_op_var);
4282+
let left_var = var!(left_var);
4283+
let right_var = var!(right_var);
4284+
let pull_up_member_var = var!(pull_up_member_var);
4285+
let left_out_var = var!(left_out_var);
4286+
let right_out_var = var!(right_out_var);
4287+
move |egraph, subst| {
4288+
for time_dimension_op in
4289+
var_iter!(egraph[subst[time_dimension_op_var]], FilterMemberOp).cloned()
4290+
{
4291+
fn time_dimension_op_score(time_dimension_op: &str) -> i32 {
4292+
match time_dimension_op {
4293+
"beforeDate" => -1,
4294+
"beforeOrOnDate" => -1,
4295+
"afterDate" => 1,
4296+
"afterOrOnDate" => 1,
4297+
_ => 0,
4298+
}
4299+
}
4300+
4301+
let op_score = time_dimension_op_score(&time_dimension_op);
4302+
if op_score == 0 {
4303+
continue;
4304+
}
4305+
for time_dimension_member in
4306+
var_iter!(egraph[subst[time_dimension_member_var]], FilterMemberMember).cloned()
4307+
{
4308+
if let Some(left_filter_operators) =
4309+
egraph[subst[left_var]].data.filter_operators.clone()
4310+
{
4311+
if let Some(right_filter_operators) =
4312+
egraph[subst[right_var]].data.filter_operators.clone()
4313+
{
4314+
let left_filter_operator_score = left_filter_operators
4315+
.iter()
4316+
.filter(|(member, _)| member == &time_dimension_member)
4317+
.map(|(_, op)| time_dimension_op_score(op))
4318+
.sum::<i32>();
4319+
4320+
let right_filter_operator_score = right_filter_operators
4321+
.iter()
4322+
.filter(|(member, _)| member == &time_dimension_member)
4323+
.map(|(_, op)| time_dimension_op_score(op))
4324+
.sum::<i32>();
4325+
4326+
if left_filter_operator_score == op_score * -1
4327+
&& right_filter_operator_score != op_score
4328+
{
4329+
subst.insert(pull_up_member_var, subst[right_var]);
4330+
4331+
subst.insert(left_out_var, subst[left_var]);
4332+
4333+
subst.insert(right_out_var, subst[time_dimension_filter_var]);
4334+
4335+
return true;
4336+
}
4337+
4338+
if right_filter_operator_score == op_score * -1
4339+
&& left_filter_operator_score != op_score
4340+
{
4341+
subst.insert(pull_up_member_var, subst[left_var]);
4342+
4343+
subst.insert(left_out_var, subst[time_dimension_filter_var]);
4344+
4345+
subst.insert(right_out_var, subst[right_var]);
4346+
4347+
return true;
4348+
}
4349+
}
4350+
}
4351+
}
4352+
}
4353+
4354+
false
4355+
}
4356+
}
41914357
}
41924358

41934359
fn filter_unwrap_cast_push_down(

0 commit comments

Comments
 (0)