Skip to content

Commit 92a4b8e

Browse files
authored
feat(cubesql): Add filter flattening rule (#9148)
Allow to flatten filter node into internal WrappedSelect. This should allow to execute plans like Aggregate(Filter(Join(...))) as a single grouped wrapper.
1 parent 554bfc4 commit 92a4b8e

File tree

3 files changed

+256
-5
lines changed

3 files changed

+256
-5
lines changed

rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/filter.rs

Lines changed: 239 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
compile::rewrite::{
3-
cube_scan_wrapper, filter,
3+
cube_scan_wrapper, filter, rewrite,
44
rewriter::{CubeEGraph, CubeRewrite},
55
rules::wrapper::WrapperRules,
66
subquery, transforming_rewrite, wrapped_select, wrapped_select_aggr_expr_empty_tail,
@@ -431,6 +431,244 @@ impl WrapperRules {
431431
)]);
432432
}
433433

434+
pub fn filter_merge_rules(&self, rules: &mut Vec<CubeRewrite>) {
435+
rules.extend(vec![rewrite(
436+
"wrapper-merge-filter-with-inner-wrapped-select",
437+
// Input is not a finished wrapper_pullup_replacer, but WrappedSelect just before pullup
438+
// After pullup replacer would disable push to cube, because any node on top would have WrappedSelect in `from`
439+
// So there would be no CubeScan to push to
440+
// Instead, this rule tries to catch `from` before pulling up, and merge outer Filter into inner WrappedSelect
441+
filter(
442+
"?filter_expr",
443+
cube_scan_wrapper(
444+
wrapped_select(
445+
"WrappedSelectSelectType:Projection",
446+
wrapper_pullup_replacer(
447+
wrapped_select_projection_expr_empty_tail(),
448+
wrapper_replacer_context(
449+
"?alias_to_cube",
450+
"WrapperReplacerContextPushToCube:true",
451+
"WrapperReplacerContextInProjection:false",
452+
"?cube_members",
453+
"?grouped_subqueries",
454+
"?ungrouped_scan",
455+
),
456+
),
457+
wrapper_pullup_replacer(
458+
wrapped_select_subqueries_empty_tail(),
459+
wrapper_replacer_context(
460+
"?alias_to_cube",
461+
"WrapperReplacerContextPushToCube:true",
462+
"WrapperReplacerContextInProjection:false",
463+
"?cube_members",
464+
"?grouped_subqueries",
465+
"?ungrouped_scan",
466+
),
467+
),
468+
wrapper_pullup_replacer(
469+
wrapped_select_group_expr_empty_tail(),
470+
wrapper_replacer_context(
471+
"?alias_to_cube",
472+
"WrapperReplacerContextPushToCube:true",
473+
"WrapperReplacerContextInProjection:false",
474+
"?cube_members",
475+
"?grouped_subqueries",
476+
"?ungrouped_scan",
477+
),
478+
),
479+
wrapper_pullup_replacer(
480+
wrapped_select_aggr_expr_empty_tail(),
481+
wrapper_replacer_context(
482+
"?alias_to_cube",
483+
"WrapperReplacerContextPushToCube:true",
484+
"WrapperReplacerContextInProjection:false",
485+
"?cube_members",
486+
"?grouped_subqueries",
487+
"?ungrouped_scan",
488+
),
489+
),
490+
wrapper_pullup_replacer(
491+
wrapped_select_window_expr_empty_tail(),
492+
wrapper_replacer_context(
493+
"?alias_to_cube",
494+
"WrapperReplacerContextPushToCube:true",
495+
"WrapperReplacerContextInProjection:false",
496+
"?cube_members",
497+
"?grouped_subqueries",
498+
"?ungrouped_scan",
499+
),
500+
),
501+
wrapper_pullup_replacer(
502+
"?inner_from",
503+
wrapper_replacer_context(
504+
"?alias_to_cube",
505+
"WrapperReplacerContextPushToCube:true",
506+
"WrapperReplacerContextInProjection:false",
507+
"?cube_members",
508+
"?grouped_subqueries",
509+
"?ungrouped_scan",
510+
),
511+
),
512+
wrapper_pullup_replacer(
513+
"?inner_joins",
514+
wrapper_replacer_context(
515+
"?alias_to_cube",
516+
"WrapperReplacerContextPushToCube:true",
517+
"WrapperReplacerContextInProjection:false",
518+
"?cube_members",
519+
"?grouped_subqueries",
520+
"?ungrouped_scan",
521+
),
522+
),
523+
wrapper_pullup_replacer(
524+
wrapped_select_filter_expr_empty_tail(),
525+
wrapper_replacer_context(
526+
"?alias_to_cube",
527+
"WrapperReplacerContextPushToCube:true",
528+
"WrapperReplacerContextInProjection:false",
529+
"?cube_members",
530+
"?grouped_subqueries",
531+
"?ungrouped_scan",
532+
),
533+
),
534+
wrapped_select_having_expr_empty_tail(),
535+
// Inner must not have limit and offset, because they are not commutative with aggregation
536+
"WrappedSelectLimit:None",
537+
"WrappedSelectOffset:None",
538+
wrapper_pullup_replacer(
539+
wrapped_select_order_expr_empty_tail(),
540+
wrapper_replacer_context(
541+
"?alias_to_cube",
542+
"WrapperReplacerContextPushToCube:true",
543+
"WrapperReplacerContextInProjection:false",
544+
"?cube_members",
545+
"?grouped_subqueries",
546+
"?ungrouped_scan",
547+
),
548+
),
549+
"WrappedSelectAlias:None",
550+
"WrappedSelectDistinct:false",
551+
"WrappedSelectPushToCube:true",
552+
"WrappedSelectUngroupedScan:true",
553+
),
554+
"CubeScanWrapperFinalized:false",
555+
),
556+
),
557+
cube_scan_wrapper(
558+
wrapped_select(
559+
"WrappedSelectSelectType:Projection",
560+
wrapper_pullup_replacer(
561+
wrapped_select_projection_expr_empty_tail(),
562+
wrapper_replacer_context(
563+
"?alias_to_cube",
564+
"WrapperReplacerContextPushToCube:true",
565+
"WrapperReplacerContextInProjection:false",
566+
"?cube_members",
567+
"?grouped_subqueries",
568+
"WrapperReplacerContextUngroupedScan:true",
569+
),
570+
),
571+
wrapper_pullup_replacer(
572+
wrapped_select_subqueries_empty_tail(),
573+
wrapper_replacer_context(
574+
"?alias_to_cube",
575+
"WrapperReplacerContextPushToCube:true",
576+
"WrapperReplacerContextInProjection:false",
577+
"?cube_members",
578+
"?grouped_subqueries",
579+
"WrapperReplacerContextUngroupedScan:true",
580+
),
581+
),
582+
wrapper_pullup_replacer(
583+
wrapped_select_group_expr_empty_tail(),
584+
wrapper_replacer_context(
585+
"?alias_to_cube",
586+
"WrapperReplacerContextPushToCube:true",
587+
"WrapperReplacerContextInProjection:false",
588+
"?cube_members",
589+
"?grouped_subqueries",
590+
"WrapperReplacerContextUngroupedScan:true",
591+
),
592+
),
593+
wrapper_pullup_replacer(
594+
wrapped_select_aggr_expr_empty_tail(),
595+
wrapper_replacer_context(
596+
"?alias_to_cube",
597+
"WrapperReplacerContextPushToCube:true",
598+
"WrapperReplacerContextInProjection:false",
599+
"?cube_members",
600+
"?grouped_subqueries",
601+
"WrapperReplacerContextUngroupedScan:true",
602+
),
603+
),
604+
wrapper_pullup_replacer(
605+
wrapped_select_window_expr_empty_tail(),
606+
wrapper_replacer_context(
607+
"?alias_to_cube",
608+
"WrapperReplacerContextPushToCube:true",
609+
"WrapperReplacerContextInProjection:false",
610+
"?cube_members",
611+
"?grouped_subqueries",
612+
"WrapperReplacerContextUngroupedScan:true",
613+
),
614+
),
615+
wrapper_pullup_replacer(
616+
"?inner_from",
617+
wrapper_replacer_context(
618+
"?alias_to_cube",
619+
"WrapperReplacerContextPushToCube:true",
620+
"WrapperReplacerContextInProjection:false",
621+
"?cube_members",
622+
"?grouped_subqueries",
623+
"WrapperReplacerContextUngroupedScan:true",
624+
),
625+
),
626+
wrapper_pullup_replacer(
627+
"?inner_joins",
628+
wrapper_replacer_context(
629+
"?alias_to_cube",
630+
"WrapperReplacerContextPushToCube:true",
631+
"WrapperReplacerContextInProjection:false",
632+
"?cube_members",
633+
"?grouped_subqueries",
634+
"WrapperReplacerContextUngroupedScan:true",
635+
),
636+
),
637+
wrapper_pushdown_replacer(
638+
"?filter_expr",
639+
wrapper_replacer_context(
640+
"?alias_to_cube",
641+
"WrapperReplacerContextPushToCube:true",
642+
"WrapperReplacerContextInProjection:false",
643+
"?cube_members",
644+
"?grouped_subqueries",
645+
"WrapperReplacerContextUngroupedScan:true",
646+
),
647+
),
648+
wrapped_select_having_expr_empty_tail(),
649+
"WrappedSelectLimit:None",
650+
"WrappedSelectOffset:None",
651+
wrapper_pullup_replacer(
652+
wrapped_select_order_expr_empty_tail(),
653+
wrapper_replacer_context(
654+
"?alias_to_cube",
655+
"WrapperReplacerContextPushToCube:true",
656+
"WrapperReplacerContextInProjection:false",
657+
"?cube_members",
658+
"?grouped_subqueries",
659+
"WrapperReplacerContextUngroupedScan:true",
660+
),
661+
),
662+
"WrappedSelectAlias:None",
663+
"WrappedSelectDistinct:false",
664+
"WrappedSelectPushToCube:true",
665+
"WrappedSelectUngroupedScan:true",
666+
),
667+
"CubeScanWrapperFinalized:false",
668+
),
669+
)]);
670+
}
671+
434672
fn transform_filter(
435673
&self,
436674
push_to_cube_var: &'static str,

rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ impl RewriteRules for WrapperRules {
6363
self.limit_rules(&mut rules);
6464
self.filter_rules(&mut rules);
6565
self.filter_rules_subquery(&mut rules);
66+
self.filter_merge_rules(&mut rules);
6667
self.subquery_rules(&mut rules);
6768
self.order_rules(&mut rules);
6869
self.window_rules(&mut rules);

rust/cubesql/cubesql/src/compile/test/test_cube_join_grouped.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,8 @@ JOIN
387387
GROUP BY customer_gender
388388
ORDER BY mme_inner__ DESC
389389
LIMIT 20) AS anon_1 ON customer_gender = customer_gender__
390-
-- filters here are not supported without filter flattening in wrapper
391-
-- TODO enable it when ready
392-
-- WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
393-
-- AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
390+
WHERE order_date >= TO_TIMESTAMP('2022-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
391+
AND order_date < TO_TIMESTAMP('2024-09-16 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
394392
GROUP BY DATE_TRUNC('week', order_date)
395393
ORDER BY avgPrice DESC
396394
LIMIT 1000
@@ -424,6 +422,7 @@ LIMIT 1000
424422
let subquery = &wrapped_sql_node.request.subquery_joins.unwrap()[0];
425423

426424
assert!(!subquery.sql.contains("ungrouped"));
425+
// Inner order
427426
let re = Regex::new(
428427
r#""order":\s*\[\s*\[\s*"KibanaSampleDataEcommerce.avgPrice",\s*"desc"\s*\]\s*\]"#,
429428
)
@@ -435,6 +434,19 @@ LIMIT 1000
435434
r#"${KibanaSampleDataEcommerce.customer_gender} = \"anon_1\".\"customer_gender_\""#
436435
));
437436

437+
// Outer filter
438+
assert_eq!(wrapped_sql_node.request.segments.as_ref().unwrap().len(), 1);
439+
assert!(
440+
wrapped_sql_node.request.segments.as_ref().unwrap()[0].contains(
441+
r#"${KibanaSampleDataEcommerce.order_date} >= timestamptz '2022-09-16T00:00:00.000Z'"#
442+
)
443+
);
444+
assert!(
445+
wrapped_sql_node.request.segments.as_ref().unwrap()[0].contains(
446+
r#"${KibanaSampleDataEcommerce.order_date} < timestamptz '2024-09-16T00:00:00.000Z'"#
447+
)
448+
);
449+
438450
// Measure from top aggregation
439451
assert!(wrapped_sql_node
440452
.wrapped_sql

0 commit comments

Comments
 (0)