@@ -25,7 +25,7 @@ use crate::{
2525} ;
2626use datafusion:: {
2727 arrow:: datatypes:: DataType as ArrowDataType ,
28- logical_plan:: { Column , DFSchema , Operator } ,
28+ logical_plan:: { Column , DFSchema , Expr , Operator } ,
2929 physical_plan:: aggregates:: AggregateFunction ,
3030 scalar:: ScalarValue ,
3131} ;
@@ -128,7 +128,6 @@ impl RewriteRules for SplitRules {
128128 "?projection_alias" ,
129129 ) ,
130130 ) ,
131- // TODO: reaggregate rule requires aliases for all exprs in projection
132131 transforming_rewrite(
133132 "split-reaggregate-projection" ,
134133 projection(
@@ -2059,6 +2058,27 @@ impl RewriteRules for SplitRules {
20592058 ) ,
20602059 inner_aggregate_split_replacer( "?expr" , "?cube" ) ,
20612060 ) ,
2061+ rewrite(
2062+ "split-push-down-substr-outer-replacer-metabase" ,
2063+ // Reaggregation may not be possible in all cases and won't change the final result
2064+ // for SUBSTRING(column, 1, 1234) issued by Metabase
2065+ outer_projection_split_replacer(
2066+ fun_expr( "Substr" , vec![
2067+ column_expr( "?column" ) ,
2068+ literal_int( 1 ) ,
2069+ literal_int( 1234 ) ,
2070+ ] ) ,
2071+ "?alias_to_cube" ,
2072+ ) ,
2073+ fun_expr(
2074+ "Substr" ,
2075+ vec![
2076+ outer_projection_split_replacer( column_expr( "?column" ) , "?alias_to_cube" ) ,
2077+ literal_int( 1 ) ,
2078+ literal_int( 1234 ) ,
2079+ ] ,
2080+ ) ,
2081+ ) ,
20622082 // Alias
20632083 rewrite(
20642084 "split-push-down-alias-inner-replacer" ,
@@ -3676,8 +3696,7 @@ impl RewriteRules for SplitRules {
36763696 )
36773697 } ,
36783698 |_, _| true ,
3679- // TODO: change to false after post-aggregation improvements
3680- true ,
3699+ false ,
36813700 false ,
36823701 true ,
36833702 Some ( vec ! [ ( "?expr" , column_expr( "?column" ) ) ] ) ,
@@ -4522,30 +4541,27 @@ impl SplitRules {
45224541 for column in var_iter ! ( egraph[ subst[ column_var] ] , ColumnExprColumn ) . cloned ( ) {
45234542 if let Some ( ( _, cube) ) = meta. find_cube_by_column ( & alias_to_cube, & column) {
45244543 if let Some ( measure) = cube. lookup_measure ( & column. name ) {
4525- if measure. agg_type . is_none ( ) {
4526- continue ;
4544+ if let Some ( agg_type) = & measure. agg_type {
4545+ if let Some ( output_fun) = utils:: reaggragate_fun ( agg_type) {
4546+ subst. insert (
4547+ output_fun_var,
4548+ egraph. add (
4549+ LogicalPlanLanguage :: AggregateFunctionExprFun (
4550+ AggregateFunctionExprFun ( output_fun) ,
4551+ ) ,
4552+ ) ,
4553+ ) ;
4554+ subst. insert (
4555+ distinct_var,
4556+ egraph. add (
4557+ LogicalPlanLanguage :: AggregateFunctionExprDistinct (
4558+ AggregateFunctionExprDistinct ( false ) ,
4559+ ) ,
4560+ ) ,
4561+ ) ;
4562+ return true ;
4563+ }
45274564 }
4528-
4529- let output_fun = match measure. agg_type . as_ref ( ) . unwrap ( ) . as_str ( ) {
4530- "count" => AggregateFunction :: Sum ,
4531- "sum" => AggregateFunction :: Sum ,
4532- "min" => AggregateFunction :: Min ,
4533- "max" => AggregateFunction :: Max ,
4534- _ => continue ,
4535- } ;
4536- subst. insert (
4537- output_fun_var,
4538- egraph. add ( LogicalPlanLanguage :: AggregateFunctionExprFun (
4539- AggregateFunctionExprFun ( output_fun) ,
4540- ) ) ,
4541- ) ;
4542- subst. insert (
4543- distinct_var,
4544- egraph. add ( LogicalPlanLanguage :: AggregateFunctionExprDistinct (
4545- AggregateFunctionExprDistinct ( false ) ,
4546- ) ) ,
4547- ) ;
4548- return true ;
45494565 }
45504566 }
45514567 }
@@ -4615,6 +4631,7 @@ impl SplitRules {
46154631 let group_aggregate_cube_var = var ! ( group_aggregate_cube_var) ;
46164632 let new_expr_var = var ! ( new_expr_var) ;
46174633 let inner_projection_alias_var = var ! ( inner_projection_alias_var) ;
4634+ let meta = self . cube_context . meta . clone ( ) ;
46184635 move |egraph, subst| {
46194636 if let Some ( expr_to_alias) =
46204637 & egraph. index ( subst[ projection_expr_var] ) . data . expr_to_alias
@@ -4623,55 +4640,102 @@ impl SplitRules {
46234640 var_iter ! ( egraph[ subst[ alias_to_cube_var] ] , CubeScanAliasToCube ) . cloned ( )
46244641 {
46254642 // Replace outer projection columns with unqualified variants
4626- let expr = expr_to_alias
4643+ if let Some ( expr_name_to_alias ) = expr_to_alias
46274644 . clone ( )
46284645 . into_iter ( )
4629- . map ( |( _, a) | {
4630- let column = Column :: from_name ( a) ;
4631- let column_expr_column = egraph. add (
4632- LogicalPlanLanguage :: ColumnExprColumn ( ColumnExprColumn ( column) ) ,
4633- ) ;
4634- egraph. add ( LogicalPlanLanguage :: ColumnExpr ( [ column_expr_column] ) )
4646+ . map ( |( expr, alias, explicit) | {
4647+ let default_alias = Some ( ( alias. clone ( ) , None ) ) ;
4648+ if explicit == Some ( true ) {
4649+ return default_alias;
4650+ }
4651+ if let Expr :: Column ( column) = & expr {
4652+ if let Some ( ( _, cube) ) =
4653+ meta. find_cube_by_column ( & alias_to_cube, column)
4654+ {
4655+ if let Some ( measure) = cube. lookup_measure ( & column. name ) {
4656+ if let Some ( agg_type) = & measure. agg_type {
4657+ let aggr_expr = Expr :: AggregateFunction {
4658+ fun : utils:: reaggragate_fun ( & agg_type) ?,
4659+ args : vec ! [ expr] ,
4660+ distinct : false ,
4661+ } ;
4662+ let expr_name =
4663+ aggr_expr. name ( & DFSchema :: empty ( ) ) . ok ( ) ?;
4664+ return Some ( ( expr_name, Some ( alias) ) ) ;
4665+ }
4666+ }
4667+ }
4668+ }
4669+ default_alias
46354670 } )
4636- . collect :: < Vec < _ > > ( ) ;
4637- let mut projection_expr =
4638- egraph. add ( LogicalPlanLanguage :: ProjectionExpr ( vec ! [ ] ) ) ;
4639- for i in expr. into_iter ( ) . rev ( ) {
4640- projection_expr = egraph. add ( LogicalPlanLanguage :: ProjectionExpr ( vec ! [
4641- i,
4642- projection_expr,
4643- ] ) ) ;
4644- }
4645- subst. insert ( new_expr_var, projection_expr) ;
4671+ . collect :: < Option < Vec < _ > > > ( )
4672+ {
4673+ let expr = expr_name_to_alias
4674+ . into_iter ( )
4675+ . map ( |( name, alias) | {
4676+ let column = Column :: from_name ( name) ;
4677+ let column_expr_column = egraph. add (
4678+ LogicalPlanLanguage :: ColumnExprColumn ( ColumnExprColumn ( column) ) ,
4679+ ) ;
4680+ let column_expr = egraph
4681+ . add ( LogicalPlanLanguage :: ColumnExpr ( [ column_expr_column] ) ) ;
4682+ if let Some ( alias) = alias {
4683+ let alias_expr_alias = egraph. add (
4684+ LogicalPlanLanguage :: AliasExprAlias ( AliasExprAlias ( alias) ) ,
4685+ ) ;
4686+ return egraph. add ( LogicalPlanLanguage :: AliasExpr ( [
4687+ column_expr,
4688+ alias_expr_alias,
4689+ ] ) ) ;
4690+ }
4691+ column_expr
4692+ } )
4693+ . collect :: < Vec < _ > > ( ) ;
46464694
4647- subst. insert (
4648- inner_projection_alias_var,
4649- // Do not put alias on inner projection so table name from cube scan can be reused
4650- egraph. add ( LogicalPlanLanguage :: ProjectionAlias ( ProjectionAlias ( None ) ) ) ,
4651- ) ;
4695+ let mut projection_expr =
4696+ egraph. add ( LogicalPlanLanguage :: ProjectionExpr ( vec ! [ ] ) ) ;
4697+ for i in expr. into_iter ( ) . rev ( ) {
4698+ projection_expr =
4699+ egraph. add ( LogicalPlanLanguage :: ProjectionExpr ( vec ! [
4700+ i,
4701+ projection_expr,
4702+ ] ) ) ;
4703+ }
4704+ subst. insert ( new_expr_var, projection_expr) ;
46524705
4653- subst. insert (
4654- inner_aggregate_cube_var,
4655- egraph. add ( LogicalPlanLanguage :: InnerAggregateSplitReplacerAliasToCube (
4656- InnerAggregateSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4657- ) ) ,
4658- ) ;
4706+ subst. insert (
4707+ inner_projection_alias_var,
4708+ // Do not put alias on inner projection so table name from cube scan can be reused
4709+ egraph. add ( LogicalPlanLanguage :: ProjectionAlias ( ProjectionAlias ( None ) ) ) ,
4710+ ) ;
46594711
4660- subst. insert (
4661- group_expr_cube_var,
4662- egraph. add ( LogicalPlanLanguage :: GroupExprSplitReplacerAliasToCube (
4663- GroupExprSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4664- ) ) ,
4665- ) ;
4712+ subst. insert (
4713+ inner_aggregate_cube_var,
4714+ egraph. add (
4715+ LogicalPlanLanguage :: InnerAggregateSplitReplacerAliasToCube (
4716+ InnerAggregateSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4717+ ) ,
4718+ ) ,
4719+ ) ;
46664720
4667- subst. insert (
4668- group_aggregate_cube_var ,
4669- egraph. add ( LogicalPlanLanguage :: GroupAggregateSplitReplacerAliasToCube (
4670- GroupAggregateSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4671- ) ) ,
4672- ) ;
4721+ subst. insert (
4722+ group_expr_cube_var ,
4723+ egraph. add ( LogicalPlanLanguage :: GroupExprSplitReplacerAliasToCube (
4724+ GroupExprSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4725+ ) ) ,
4726+ ) ;
46734727
4674- return true ;
4728+ subst. insert (
4729+ group_aggregate_cube_var,
4730+ egraph. add (
4731+ LogicalPlanLanguage :: GroupAggregateSplitReplacerAliasToCube (
4732+ GroupAggregateSplitReplacerAliasToCube ( alias_to_cube. clone ( ) ) ,
4733+ ) ,
4734+ ) ,
4735+ ) ;
4736+
4737+ return true ;
4738+ }
46754739 }
46764740 }
46774741 false
0 commit comments