From 54c989457b0d987b2581b6736c732be1838be7b5 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Mon, 24 Feb 2025 17:02:43 +0200 Subject: [PATCH 1/4] refactor(cubesql): Remove unnecessary Vec+Box for rules --- .../cubesql/src/compile/rewrite/rewriter.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs index a3382be31750d..01efbf4ccab65 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs @@ -473,20 +473,16 @@ impl Rewriter { eval_stable_functions: bool, ) -> Vec { let sql_push_down = Self::sql_push_down_enabled(); - let rules: Vec> = vec![ - Box::new(MemberRules::new( - meta_context.clone(), - config_obj.clone(), - sql_push_down, - )), - Box::new(FilterRules::new( + let rules: &[&dyn RewriteRules] = &[ + &MemberRules::new(meta_context.clone(), config_obj.clone(), sql_push_down), + &FilterRules::new( meta_context.clone(), config_obj.clone(), eval_stable_functions, - )), - Box::new(DateRules::new(config_obj.clone())), - Box::new(OrderRules::new()), - Box::new(CommonRules::new(config_obj.clone())), + ), + &DateRules::new(config_obj.clone()), + &OrderRules::new(), + &CommonRules::new(config_obj.clone()), ]; let mut rewrites = Vec::new(); for r in rules { From e9ea50f6bcfc0e00d88d7f2ad0df09a3e2b68831 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 4 Mar 2025 15:58:07 +0200 Subject: [PATCH 2/4] test(cubesql): Add NOW function template --- rust/cubesql/cubesql/src/compile/test/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/cubesql/cubesql/src/compile/test/mod.rs b/rust/cubesql/cubesql/src/compile/test/mod.rs index 44ea0b0db3f8e..ccca09d0d9271 100644 --- a/rust/cubesql/cubesql/src/compile/test/mod.rs +++ b/rust/cubesql/cubesql/src/compile/test/mod.rs @@ -581,6 +581,7 @@ pub fn sql_generator( ("functions/LEAST".to_string(), "LEAST({{ args_concat }})".to_string()), ("functions/DATEDIFF".to_string(), "DATEDIFF({{ date_part }}, {{ args[1] }}, {{ args[2] }})".to_string()), ("functions/CURRENTDATE".to_string(), "CURRENT_DATE({{ args_concat }})".to_string()), + ("functions/NOW".to_string(), "NOW({{ args_concat }})".to_string()), ("functions/DATE_ADD".to_string(), "DATE_ADD({{ args_concat }})".to_string()), ("functions/CONCAT".to_string(), "CONCAT({{ args_concat }})".to_string()), ("functions/DATE".to_string(), "DATE({{ args_concat }})".to_string()), From 1cdb9efa235854b9174f83b895982b45a241b706 Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Tue, 4 Mar 2025 16:15:38 +0200 Subject: [PATCH 3/4] fix(cubesql): Bump default rewrite iteration limit to 500 --- rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs index 01efbf4ccab65..488ca241e053d 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs @@ -208,7 +208,7 @@ impl Rewriter { .with_iter_limit( env::var("CUBESQL_REWRITE_MAX_ITERATIONS") .map(|v| v.parse::().unwrap()) - .unwrap_or(300), + .unwrap_or(500), ) .with_node_limit( env::var("CUBESQL_REWRITE_MAX_NODES") From 9d2daaffb2b4731a927516b3aae293f0e9251d0f Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Thu, 20 Feb 2025 20:00:49 +0200 Subject: [PATCH 4/4] fix(cubesql): Use pushdown-pullup scheme for FilterSimplifyReplacer This should avoid unexpected unifications of simplified expression in unrelated context. For example, same expression can be present in filter and projection, but due to over-unification in filters it can receive different representation in projection, and break aliases later, during extraction --- rust/cubesql/cubesql/src/compile/mod.rs | 17 +- .../cubesql/src/compile/rewrite/cost.rs | 2 + .../cubesql/src/compile/rewrite/mod.rs | 13 +- .../src/compile/rewrite/rules/filters.rs | 413 +++++++++++++----- .../cubesql/src/compile/test/test_wrapper.rs | 54 ++- 5 files changed, 373 insertions(+), 126 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 86a0b76db789f..b0ec639e65420 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -12878,7 +12878,7 @@ ORDER BY "source"."str0" ASC } init_testing_logger(); - let logical_plan = convert_select_to_query_plan( + let query_plan = convert_select_to_query_plan( r#" SELECT CAST("public"."KibanaSampleDataEcommerce"."order_date" AS DATE) AS "order_date", @@ -12899,11 +12899,16 @@ ORDER BY "source"."str0" ASC .to_string(), DatabaseProtocol::PostgreSQL, ) - .await - .as_logical_plan(); + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let logical_plan = query_plan.as_logical_plan(); - let end_date = chrono::Utc::now().date_naive(); - let start_date = end_date - chrono::Duration::days(30); assert_eq!( logical_plan.find_cube_scan_wrapped_sql().request, V1LoadRequestQuery { @@ -12930,7 +12935,7 @@ ORDER BY "source"."str0" ASC "cube_name": "KibanaSampleDataEcommerce", "alias": "kibanasampledata", "cube_params": ["KibanaSampleDataEcommerce"], - "expr": format!("(((${{KibanaSampleDataEcommerce.order_date}} >= DATE('{start_date}')) AND (${{KibanaSampleDataEcommerce.order_date}} < DATE('{end_date}'))) AND (((${{KibanaSampleDataEcommerce.notes}} = $0$) OR (${{KibanaSampleDataEcommerce.notes}} = $1$)) OR (${{KibanaSampleDataEcommerce.notes}} = $2$)))"), + "expr": format!("(((${{KibanaSampleDataEcommerce.order_date}} >= CAST((NOW() + INTERVAL '-30 DAY') AS DATE)) AND (${{KibanaSampleDataEcommerce.order_date}} < CAST(NOW() AS DATE))) AND (((${{KibanaSampleDataEcommerce.notes}} = $0$) OR (${{KibanaSampleDataEcommerce.notes}} = $1$)) OR (${{KibanaSampleDataEcommerce.notes}} = $2$)))"), "grouping_set": null, }).to_string(), ]), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs index ab22941f78b59..2a1732849901f 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs @@ -104,6 +104,8 @@ impl BestCubePlan { LogicalPlanLanguage::OrderReplacer(_) => 1, LogicalPlanLanguage::MemberReplacer(_) => 1, LogicalPlanLanguage::FilterReplacer(_) => 1, + LogicalPlanLanguage::FilterSimplifyPushDownReplacer(_) => 1, + LogicalPlanLanguage::FilterSimplifyPullUpReplacer(_) => 1, LogicalPlanLanguage::TimeDimensionDateRangeReplacer(_) => 1, LogicalPlanLanguage::InnerAggregateSplitReplacer(_) => 1, LogicalPlanLanguage::OuterProjectionSplitReplacer(_) => 1, diff --git a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs index 57bbfa73b1f00..1ff66f33b939b 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/mod.rs @@ -405,7 +405,10 @@ crate::plan_to_language! { members: Vec, aliases: Vec<(String, String)>, }, - FilterSimplifyReplacer { + FilterSimplifyPushDownReplacer { + filters: Vec, + }, + FilterSimplifyPullUpReplacer { filters: Vec, }, OrderReplacer { @@ -1901,8 +1904,12 @@ fn filter_replacer( ) } -fn filter_simplify_replacer(members: impl Display) -> String { - format!("(FilterSimplifyReplacer {})", members) +fn filter_simplify_push_down_replacer(members: impl Display) -> String { + format!("(FilterSimplifyPushDownReplacer {})", members) +} + +fn filter_simplify_pull_up_replacer(members: impl Display) -> String { + format!("(FilterSimplifyPullUpReplacer {})", members) } fn inner_aggregate_split_replacer(members: impl Display, alias_to_cube: impl Display) -> String { diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs index 115a2890fbc0e..b54d81c94f711 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs @@ -6,10 +6,11 @@ use crate::{ between_expr, binary_expr, case_expr, case_expr_var_arg, cast_expr, change_user_member, column_expr, cube_scan, cube_scan_filters, cube_scan_filters_empty_tail, cube_scan_members, dimension_expr, expr_column_name, filter, filter_member, filter_op, filter_op_filters, - filter_op_filters_empty_tail, filter_replacer, filter_simplify_replacer, fun_expr, - fun_expr_args_legacy, fun_expr_var_arg, inlist_expr, inlist_expr_list, is_not_null_expr, - is_null_expr, like_expr, limit, list_rewrite, literal_bool, literal_expr, literal_int, - literal_string, measure_expr, negative_expr, not_expr, projection, rewrite, + filter_op_filters_empty_tail, filter_replacer, filter_simplify_pull_up_replacer, + filter_simplify_push_down_replacer, fun_expr, fun_expr_args_legacy, fun_expr_var_arg, + inlist_expr, inlist_expr_list, is_not_null_expr, is_null_expr, like_expr, limit, + list_rewrite, literal_bool, literal_expr, literal_int, literal_string, measure_expr, + negative_expr, not_expr, projection, rewrite, rewriter::{CubeEGraph, CubeRewrite, RewriteRules}, scalar_fun_expr_args_empty_tail, segment_member, time_dimension_date_range_replacer, time_dimension_expr, transform_original_expr_to_alias, transforming_chain_rewrite, @@ -24,6 +25,7 @@ use crate::{ TimeDimensionGranularity, TimeDimensionName, }, config::ConfigObj, + copy_value, transport::{ext::V1CubeMetaExt, MemberType, MetaContext}, var, var_iter, }; @@ -69,7 +71,7 @@ impl RewriteRules for FilterRules { fn rewrite_rules(&self) -> Vec { let mut rules = vec![ transforming_rewrite( - "push-down-filter", + "push-down-filter-simplify", filter( "?expr", cube_scan( @@ -88,15 +90,7 @@ impl RewriteRules for FilterRules { cube_scan( "?alias_to_cube", "?members", - cube_scan_filters( - "?filters", - filter_replacer( - filter_simplify_replacer("?expr"), - "?filter_alias_to_cube", - "?members", - "?filter_aliases", - ), - ), + cube_scan_filters("?filters", filter_simplify_push_down_replacer("?expr")), "?order", "?limit", "?offset", @@ -105,12 +99,7 @@ impl RewriteRules for FilterRules { "?wrapped", "?ungrouped", ), - self.push_down_filter( - "?alias_to_cube", - "?expr", - "?filter_alias_to_cube", - "?filter_aliases", - ), + self.push_down_filter_simplify("?expr"), ), // Transform Filter: Boolean(False) transforming_rewrite( @@ -272,6 +261,42 @@ impl RewriteRules for FilterRules { ), ), ), + transforming_rewrite( + "push-down-filter-pickup-simplified", + cube_scan( + "?alias_to_cube", + "?members", + cube_scan_filters("?filters", filter_simplify_pull_up_replacer("?filter")), + "?order", + "?limit", + "?offset", + "?split", + "?can_pushdown_join", + "?wrapped", + "?ungrouped", + ), + cube_scan( + "?alias_to_cube", + "?members", + cube_scan_filters( + "?filters", + filter_replacer( + "?filter", + "?filter_alias_to_cube", + "?members", + "?filter_aliases", + ), + ), + "?order", + "?limit", + "?offset", + "?split", + "?can_pushdown_join", + "?wrapped", + "?ungrouped", + ), + self.push_down_filter("?alias_to_cube", "?filter_alias_to_cube", "?filter_aliases"), + ), // Transform Filter: Boolean(True) same as TRUE = TRUE, which is useless transforming_rewrite( "filter-truncate-true", @@ -1867,36 +1892,55 @@ impl RewriteRules for FilterRules { // Simplify rules transforming_rewrite( "filter-simplify-cast-unwrap", - filter_simplify_replacer(cast_expr("?expr", "?data_type")), - // TODO alias to make it equivalent transformation - filter_simplify_replacer("?expr"), + filter_simplify_push_down_replacer(cast_expr("?expr", "?data_type")), + filter_simplify_push_down_replacer("?expr"), self.transform_filter_cast_unwrap("?expr", "?data_type", false), ), transforming_rewrite( "filter-simplify-cast-push-down", - filter_simplify_replacer(cast_expr("?expr", "?data_type")), - cast_expr(filter_simplify_replacer("?expr"), "?data_type"), + filter_simplify_push_down_replacer(cast_expr("?expr", "?data_type")), + cast_expr(filter_simplify_push_down_replacer("?expr"), "?data_type"), self.transform_filter_cast_unwrap("?expr", "?data_type", true), ), + rewrite( + "filter-simplify-cast-pull-up", + cast_expr(filter_simplify_pull_up_replacer("?expr"), "?data_type"), + filter_simplify_pull_up_replacer(cast_expr("?expr", "?data_type")), + ), // Alias + // TODO remove alias completely during simplification, they should be irrelevant in filters rewrite( "filter-simplify-alias-push-down", - filter_simplify_replacer(alias_expr("?expr", "?alias")), - alias_expr(filter_simplify_replacer("?expr"), "?alias"), + filter_simplify_push_down_replacer(alias_expr("?expr", "?alias")), + alias_expr(filter_simplify_push_down_replacer("?expr"), "?alias"), + ), + rewrite( + "filter-simplify-alias-pull-up", + alias_expr(filter_simplify_pull_up_replacer("?expr"), "?alias"), + filter_simplify_pull_up_replacer(alias_expr("?expr", "?alias")), ), // Binary expr rewrite( "filter-simplify-binary-push-down", - filter_simplify_replacer(binary_expr("?left", "?op", "?right")), + filter_simplify_push_down_replacer(binary_expr("?left", "?op", "?right")), binary_expr( - filter_simplify_replacer("?left"), + filter_simplify_push_down_replacer("?left"), "?op", - filter_simplify_replacer("?right"), + filter_simplify_push_down_replacer("?right"), ), ), + rewrite( + "filter-simplify-binary-pull-up", + binary_expr( + filter_simplify_pull_up_replacer("?left"), + "?op", + filter_simplify_pull_up_replacer("?right"), + ), + filter_simplify_pull_up_replacer(binary_expr("?left", "?op", "?right")), + ), rewrite( "filter-simplify-like-push-down", - filter_simplify_replacer(like_expr( + filter_simplify_push_down_replacer(like_expr( "?like_type", "?negated", "?expr", @@ -1906,98 +1950,194 @@ impl RewriteRules for FilterRules { like_expr( "?like_type", "?negated", - filter_simplify_replacer("?expr"), - filter_simplify_replacer("?pattern"), + filter_simplify_push_down_replacer("?expr"), + filter_simplify_push_down_replacer("?pattern"), + "?escape_char", + ), + ), + rewrite( + "filter-simplify-like-pull-up", + like_expr( + "?like_type", + "?negated", + filter_simplify_pull_up_replacer("?expr"), + filter_simplify_pull_up_replacer("?pattern"), "?escape_char", ), + filter_simplify_pull_up_replacer(like_expr( + "?like_type", + "?negated", + "?expr", + "?pattern", + "?escape_char", + )), ), rewrite( "filter-simplify-not-push-down", - filter_simplify_replacer(not_expr("?expr")), - not_expr(filter_simplify_replacer("?expr")), + filter_simplify_push_down_replacer(not_expr("?expr")), + not_expr(filter_simplify_push_down_replacer("?expr")), + ), + rewrite( + "filter-simplify-not-pull-up", + not_expr(filter_simplify_pull_up_replacer("?expr")), + filter_simplify_pull_up_replacer(not_expr("?expr")), ), rewrite( "filter-simplify-inlist-push-down", - filter_simplify_replacer(inlist_expr("?expr", "?list", "?negated")), + filter_simplify_push_down_replacer(inlist_expr("?expr", "?list", "?negated")), // TODO unwrap list as well - inlist_expr(filter_simplify_replacer("?expr"), "?list", "?negated"), + inlist_expr( + filter_simplify_push_down_replacer("?expr"), + "?list", + "?negated", + ), + ), + rewrite( + "filter-simplify-inlist-pull-up", + // TODO unwrap list as well + inlist_expr( + filter_simplify_pull_up_replacer("?expr"), + "?list", + "?negated", + ), + filter_simplify_pull_up_replacer(inlist_expr("?expr", "?list", "?negated")), ), rewrite( "filter-simplify-is-null-push-down", - filter_simplify_replacer(is_null_expr("?expr")), - is_null_expr(filter_simplify_replacer("?expr")), + filter_simplify_push_down_replacer(is_null_expr("?expr")), + is_null_expr(filter_simplify_push_down_replacer("?expr")), + ), + rewrite( + "filter-simplify-is-null-pull-up", + is_null_expr(filter_simplify_pull_up_replacer("?expr")), + filter_simplify_pull_up_replacer(is_null_expr("?expr")), ), rewrite( "filter-simplify-is-not-null-push-down", - filter_simplify_replacer(is_not_null_expr("?expr")), - is_not_null_expr(filter_simplify_replacer("?expr")), + filter_simplify_push_down_replacer(is_not_null_expr("?expr")), + is_not_null_expr(filter_simplify_push_down_replacer("?expr")), ), rewrite( - "filter-simplify-literal-push-down", - filter_simplify_replacer(literal_expr("?literal")), - literal_expr("?literal"), + "filter-simplify-is-not-null-pull-up", + is_not_null_expr(filter_simplify_pull_up_replacer("?expr")), + filter_simplify_pull_up_replacer(is_not_null_expr("?expr")), ), rewrite( - "filter-simplify-column-push-down", - filter_simplify_replacer(column_expr("?column")), - column_expr("?column"), + "filter-simplify-literal", + filter_simplify_push_down_replacer(literal_expr("?literal")), + filter_simplify_pull_up_replacer(literal_expr("?literal")), + ), + rewrite( + "filter-simplify-column", + filter_simplify_push_down_replacer(column_expr("?column")), + filter_simplify_pull_up_replacer(column_expr("?column")), ), // scalar rewrite( "filter-simplify-scalar-fun-push-down", - filter_simplify_replacer(fun_expr_var_arg("?fun", "?args")), - fun_expr_var_arg("?fun", filter_simplify_replacer("?args")), + filter_simplify_push_down_replacer(fun_expr_var_arg("?fun", "?args")), + fun_expr_var_arg("?fun", filter_simplify_push_down_replacer("?args")), + ), + rewrite( + "filter-simplify-scalar-fun-pull-up", + fun_expr_var_arg("?fun", filter_simplify_pull_up_replacer("?args")), + filter_simplify_pull_up_replacer(fun_expr_var_arg("?fun", "?args")), ), rewrite( - "filter-simplify-scalar-args-empty-tail-push-down", - filter_simplify_replacer(scalar_fun_expr_args_empty_tail()), - scalar_fun_expr_args_empty_tail(), + "filter-simplify-scalar-args-empty-tail", + filter_simplify_push_down_replacer(scalar_fun_expr_args_empty_tail()), + filter_simplify_pull_up_replacer(scalar_fun_expr_args_empty_tail()), ), // udf rewrite( "filter-simplify-udf-fun-push-down", - filter_simplify_replacer(udf_expr_var_arg("?fun", "?args")), - udf_expr_var_arg("?fun", filter_simplify_replacer("?args")), + filter_simplify_push_down_replacer(udf_expr_var_arg("?fun", "?args")), + udf_expr_var_arg("?fun", filter_simplify_push_down_replacer("?args")), + ), + rewrite( + "filter-simplify-udf-fun-pull-up", + udf_expr_var_arg("?fun", filter_simplify_pull_up_replacer("?args")), + filter_simplify_pull_up_replacer(udf_expr_var_arg("?fun", "?args")), ), rewrite( "filter-simplify-udf-args-push-down", - filter_simplify_replacer(udf_fun_expr_args("?left", "?right")), + filter_simplify_push_down_replacer(udf_fun_expr_args("?left", "?right")), + udf_fun_expr_args( + filter_simplify_push_down_replacer("?left"), + filter_simplify_push_down_replacer("?right"), + ), + ), + rewrite( + "filter-simplify-udf-args-pull-up", udf_fun_expr_args( - filter_simplify_replacer("?left"), - filter_simplify_replacer("?right"), + filter_simplify_pull_up_replacer("?left"), + filter_simplify_pull_up_replacer("?right"), ), + filter_simplify_pull_up_replacer(udf_fun_expr_args("?left", "?right")), ), rewrite( - "filter-simplify-udf-args-empty-tail-push-down", - filter_simplify_replacer(udf_fun_expr_args_empty_tail()), - udf_fun_expr_args_empty_tail(), + "filter-simplify-udf-args-empty-tail", + filter_simplify_push_down_replacer(udf_fun_expr_args_empty_tail()), + filter_simplify_pull_up_replacer(udf_fun_expr_args_empty_tail()), ), // case rewrite( "filter-simplify-case-push-down", - filter_simplify_replacer(case_expr_var_arg("?expr", "?when_then", "?else")), + filter_simplify_push_down_replacer(case_expr_var_arg( + "?expr", + "?when_then", + "?else", + )), + case_expr_var_arg( + filter_simplify_push_down_replacer("?expr"), + filter_simplify_push_down_replacer("?when_then"), + filter_simplify_push_down_replacer("?else"), + ), + ), + rewrite( + "filter-simplify-case-pull-up", case_expr_var_arg( - filter_simplify_replacer("?expr"), - filter_simplify_replacer("?when_then"), - filter_simplify_replacer("?else"), + filter_simplify_pull_up_replacer("?expr"), + filter_simplify_pull_up_replacer("?when_then"), + filter_simplify_pull_up_replacer("?else"), ), + filter_simplify_pull_up_replacer(case_expr_var_arg("?expr", "?when_then", "?else")), ), rewrite( "filter-simplify-between-push-down", - filter_simplify_replacer(between_expr("?expr", "?negated", "?low", "?high")), + filter_simplify_push_down_replacer(between_expr( + "?expr", "?negated", "?low", "?high", + )), between_expr( + // TODO why expr is not simplified? "?expr", "?negated", - filter_simplify_replacer("?low"), - filter_simplify_replacer("?high"), + filter_simplify_push_down_replacer("?low"), + filter_simplify_push_down_replacer("?high"), ), ), + rewrite( + "filter-simplify-between-pull-up", + between_expr( + // TODO why expr is not simplified? + "?expr", + "?negated", + filter_simplify_pull_up_replacer("?low"), + filter_simplify_pull_up_replacer("?high"), + ), + filter_simplify_pull_up_replacer(between_expr( + "?expr", "?negated", "?low", "?high", + )), + ), filter_simplify_push_down("CaseExprExpr"), - filter_simplify_push_down_tail("CaseExprExpr"), + filter_simplify_pull_up("CaseExprExpr"), + filter_simplify_tail("CaseExprExpr"), filter_simplify_push_down("CaseExprWhenThenExpr"), - filter_simplify_push_down_tail("CaseExprWhenThenExpr"), + filter_simplify_pull_up("CaseExprWhenThenExpr"), + filter_simplify_tail("CaseExprWhenThenExpr"), filter_simplify_push_down("CaseExprElseExpr"), - filter_simplify_push_down_tail("CaseExprElseExpr"), + filter_simplify_pull_up("CaseExprElseExpr"), + filter_simplify_tail("CaseExprElseExpr"), rewrite( "filter-flatten-upper-and-left", cube_scan_filters( @@ -2450,45 +2590,74 @@ impl RewriteRules for FilterRules { "filter-simplify-scalar-args-push-down", ListType::ScalarFunctionExprArgs, ListPattern { - pattern: filter_simplify_replacer("?args"), + pattern: filter_simplify_push_down_replacer("?args"), list_var: "?args".to_string(), elem: "?arg".to_string(), }, ListPattern { pattern: "?new_args".to_string(), list_var: "?new_args".to_string(), - elem: filter_simplify_replacer("?arg"), + elem: filter_simplify_push_down_replacer("?arg"), + }, + )); + rules.push(list_rewrite( + "filter-simplify-scalar-args-pull-up", + ListType::ScalarFunctionExprArgs, + ListPattern { + pattern: "?args".to_string(), + list_var: "?args".to_string(), + elem: filter_simplify_pull_up_replacer("?arg"), + }, + ListPattern { + pattern: filter_simplify_pull_up_replacer("?new_args"), + list_var: "?new_args".to_string(), + elem: "?arg".to_string(), }, )); } else { rules.push(rewrite( "filter-simplify-scalar-args-push-down", - filter_simplify_replacer(fun_expr_args_legacy("?left", "?right")), + filter_simplify_push_down_replacer(fun_expr_args_legacy("?left", "?right")), fun_expr_args_legacy( - filter_simplify_replacer("?left"), - filter_simplify_replacer("?right"), + filter_simplify_push_down_replacer("?left"), + filter_simplify_push_down_replacer("?right"), ), )); + rules.push(rewrite( + "filter-simplify-scalar-args-pull-up", + fun_expr_args_legacy( + filter_simplify_pull_up_replacer("?left"), + filter_simplify_pull_up_replacer("?right"), + ), + filter_simplify_pull_up_replacer(fun_expr_args_legacy("?left", "?right")), + )); } if self.eval_stable_functions { rules.extend(vec![ rewrite( "filter-simplify-now", - filter_simplify_replacer(self.fun_expr("Now", Vec::::new())), - // TODO alias to make it equivalent transformation - udf_expr("eval_now", Vec::::new()), + filter_simplify_push_down_replacer(self.fun_expr("Now", Vec::::new())), + filter_simplify_pull_up_replacer(udf_expr("eval_now", Vec::::new())), ), rewrite( "filter-simplify-utc-timestamp", - filter_simplify_replacer(self.fun_expr("UtcTimestamp", Vec::::new())), - // TODO alias to make it equivalent transformation - udf_expr("eval_utc_timestamp", Vec::::new()), + filter_simplify_push_down_replacer( + self.fun_expr("UtcTimestamp", Vec::::new()), + ), + filter_simplify_pull_up_replacer(udf_expr( + "eval_utc_timestamp", + Vec::::new(), + )), ), rewrite( "filter-simplify-current-date", - filter_simplify_replacer(self.fun_expr("CurrentDate", Vec::::new())), - // TODO alias to make it equivalent transformation - udf_expr("eval_current_date", Vec::::new()), + filter_simplify_push_down_replacer( + self.fun_expr("CurrentDate", Vec::::new()), + ), + filter_simplify_pull_up_replacer(udf_expr( + "eval_current_date", + Vec::::new(), + )), ), ]); } @@ -2513,40 +2682,45 @@ impl FilterRules { fun_expr(fun_name, args, self.config_obj.push_down_pull_up_split()) } + fn push_down_filter_simplify( + &self, + exp_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let exp_var = var!(exp_var); + move |egraph, subst| { + // TODO check referenced_expr + egraph.index(subst[exp_var]).data.referenced_expr.is_some() + } + } + fn push_down_filter( &self, alias_to_cube_var: &'static str, - exp_var: &'static str, filter_alias_to_cube_var: &'static str, filter_aliases_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let alias_to_cube_var = var!(alias_to_cube_var); - let exp_var = var!(exp_var); let filter_aliases_var = var!(filter_aliases_var); let filter_alias_to_cube_var = var!(filter_alias_to_cube_var); move |egraph, subst| { - for alias_to_cube in - var_iter!(egraph[subst[alias_to_cube_var]], CubeScanAliasToCube).cloned() - { - if let Some(_referenced_expr) = &egraph.index(subst[exp_var]).data.referenced_expr { - // TODO check referenced_expr - subst.insert( - filter_alias_to_cube_var, - egraph.add(LogicalPlanLanguage::FilterReplacerAliasToCube( - FilterReplacerAliasToCube(alias_to_cube), - )), - ); - - let filter_replacer_aliases = egraph.add( - LogicalPlanLanguage::FilterReplacerAliases(FilterReplacerAliases(vec![])), - ); - subst.insert(filter_aliases_var, filter_replacer_aliases); - - return true; - } + if !copy_value!( + egraph, + subst, + Vec<(String, String)>, + alias_to_cube_var, + CubeScanAliasToCube, + filter_alias_to_cube_var, + FilterReplacerAliasToCube + ) { + return false; } - false + let filter_replacer_aliases = egraph.add(LogicalPlanLanguage::FilterReplacerAliases( + FilterReplacerAliases(vec![]), + )); + subst.insert(filter_aliases_var, filter_replacer_aliases); + + true } } @@ -5015,21 +5189,34 @@ impl FilterRules { fn filter_simplify_push_down(node_type: impl Display) -> CubeRewrite { rewrite( &format!("filter-simplify-{}-push-down", node_type), - filter_simplify_replacer(format!("({} ?left ?right)", node_type)), + filter_simplify_push_down_replacer(format!("({} ?left ?right)", node_type)), + format!( + "({} {} {})", + node_type, + filter_simplify_push_down_replacer("?left"), + filter_simplify_push_down_replacer("?right") + ), + ) +} + +fn filter_simplify_pull_up(node_type: impl Display) -> CubeRewrite { + rewrite( + &format!("filter-simplify-{}-pull-up", node_type), format!( "({} {} {})", node_type, - filter_simplify_replacer("?left"), - filter_simplify_replacer("?right") + filter_simplify_pull_up_replacer("?left"), + filter_simplify_pull_up_replacer("?right") ), + filter_simplify_pull_up_replacer(format!("({} ?left ?right)", node_type)), ) } -fn filter_simplify_push_down_tail(node_type: impl Display) -> CubeRewrite { +fn filter_simplify_tail(node_type: impl Display) -> CubeRewrite { rewrite( - &format!("filter-simplify-{}-empty-tail-push-down", node_type), - filter_simplify_replacer(node_type.to_string()), - node_type.to_string(), + &format!("filter-simplify-{}-empty-tail", node_type), + filter_simplify_push_down_replacer(node_type.to_string()), + filter_simplify_pull_up_replacer(node_type.to_string()), ) } diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index 489062c800272..47493355370b8 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -572,7 +572,7 @@ WHERE )); } -/// Using NOW() in wrapper should render proper timestamptz in SQL +/// Using NOW() in wrapper should render NOW() in SQL #[tokio::test] async fn test_wrapper_now() { if !Rewriter::sql_push_down_enabled() { @@ -611,10 +611,10 @@ GROUP BY .find_cube_scan_wrapped_sql() .wrapped_sql .sql - .contains("${KibanaSampleDataEcommerce.order_date} >= timestamptz")); + .contains("${KibanaSampleDataEcommerce.order_date} >= NOW()")); } -/// Using NOW() in ungrouped wrapper should render proper timestamptz in SQL +/// Using NOW() in ungrouped wrapper should render NOW() in SQL #[tokio::test] async fn test_wrapper_now_ungrouped() { if !Rewriter::sql_push_down_enabled() { @@ -651,7 +651,7 @@ WHERE .find_cube_scan_wrapped_sql() .wrapped_sql .sql - .contains("${KibanaSampleDataEcommerce.order_date} >= timestamptz")); + .contains("${KibanaSampleDataEcommerce.order_date} >= NOW()")); } #[tokio::test] @@ -1598,3 +1598,49 @@ async fn wrapper_typed_null() { .sql .contains("SUM(CAST(NULL AS DOUBLE))")); } + +/// Tests that exactly same expression in projection and filter have correct alias after rewriting +#[tokio::test] +async fn test_same_expression_in_projection_and_filter() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + DATE_TRUNC('day', CAST(dim_date0 AS TIMESTAMP)) +FROM MultiTypeCube +WHERE + DATE_TRUNC('day', CAST(dim_date0 AS TIMESTAMP)) >= + '2025-01-01' +GROUP BY + 1 +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + let dimensions = request.dimensions.unwrap(); + assert_eq!(dimensions.len(), 1); + let dimension = &dimensions[0]; + assert!(dimension.contains("DATE_TRUNC")); + let segments = request.segments.unwrap(); + assert_eq!(segments.len(), 1); + let segment = &segments[0]; + assert!(segment.contains("DATE_TRUNC")); +}