diff --git a/.github/workflows/rust-cubesql.yml b/.github/workflows/rust-cubesql.yml index 6cd78b68d5b3f..2aab0172c0b99 100644 --- a/.github/workflows/rust-cubesql.yml +++ b/.github/workflows/rust-cubesql.yml @@ -62,11 +62,7 @@ jobs: # We use host instead of cross container, because it's much faster runs-on: ubuntu-24.04 timeout-minutes: 60 - name: Unit (Rewrite Engine) (CUBESQL_TOP_DOWN_EXTRACTOR=${{ matrix.top-down-extractor }}) - strategy: - matrix: - top-down-extractor: ['true', 'false'] - fail-fast: false + name: Unit (Rewrite Engine) steps: - name: Checkout @@ -94,7 +90,6 @@ jobs: CUBESQL_TESTING_CUBE_TOKEN: ${{ secrets.CUBESQL_TESTING_CUBE_TOKEN }} CUBESQL_TESTING_CUBE_URL: ${{ secrets.CUBESQL_TESTING_CUBE_URL }} CUBESQL_SQL_PUSH_DOWN: true - CUBESQL_TOP_DOWN_EXTRACTOR: ${{ matrix.top-down-extractor }} CUBESQL_REWRITE_CACHE: true CUBESQL_REWRITE_TIMEOUT: 60 run: | diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index d145131eeab39..b9dbf91c0f2b9 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -13962,11 +13962,7 @@ ORDER BY "source"."str0" ASC // CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10)) if Rewriter::sql_push_down_enabled() { let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql; - if Rewriter::top_down_extractor_enabled() { - assert!(sql.contains("LIMIT 1000")); - } else { - assert!(sql.contains("\"limit\": 1000")); - } + assert!(sql.contains("LIMIT 1000")); assert!(sql.contains("% 7")); let physical_plan = query_plan.as_physical_plan().await.unwrap(); @@ -16175,18 +16171,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { dimension: "KibanaSampleDataEcommerce.order_date".to_string(), granularity: Some("month".to_string()), - date_range: if Rewriter::top_down_extractor_enabled() { - Some(json!(vec![ - "2019-01-01T00:00:00.000Z".to_string(), - "2019-01-31T23:59:59.999Z".to_string() - ])) - } else { - // Non-optimal variant with top down extractor disabled - Some(json!(vec![ - "2019-01-01 00:00:00.000".to_string(), - "2019-01-31 23:59:59.999".to_string() - ])) - } + date_range: Some(json!(vec![ + "2019-01-01T00:00:00.000Z".to_string(), + "2019-01-31T23:59:59.999Z".to_string() + ])) }]), order: Some(vec![]), ..Default::default() diff --git a/rust/cubesql/cubesql/src/compile/query_engine.rs b/rust/cubesql/cubesql/src/compile/query_engine.rs index b6b3773ab7186..142f30b914afe 100644 --- a/rust/cubesql/cubesql/src/compile/query_engine.rs +++ b/rust/cubesql/cubesql/src/compile/query_engine.rs @@ -226,7 +226,6 @@ pub trait QueryEngine { state.auth_context().unwrap(), qtrace, span_id.clone(), - self.config_ref().top_down_extractor(), ) .await .map_err(|e| match e.cause { diff --git a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs index 0246b93f159ad..20fb584c68a53 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/cost.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/cost.rs @@ -8,7 +8,7 @@ use crate::{ }, transport::{MetaContext, V1CubeMetaDimensionExt}, }; -use egg::{Analysis, CostFunction, EGraph, Id, Language, RecExpr}; +use egg::{Analysis, EGraph, Id, Language, RecExpr}; use indexmap::IndexSet; #[derive(Debug)] @@ -25,7 +25,7 @@ impl BestCubePlan { } } - pub fn initial_cost(&self, enode: &LogicalPlanLanguage, top_down: bool) -> CubePlanCost { + pub fn initial_cost(&self, enode: &LogicalPlanLanguage) -> CubePlanCost { let table_scans = match enode { LogicalPlanLanguage::TableScan(_) => 1, _ => 0, @@ -52,8 +52,7 @@ impl BestCubePlan { }; let non_pushed_down_limit_sort = match enode { - LogicalPlanLanguage::Limit(_) if !top_down => 1, - LogicalPlanLanguage::Sort(_) if top_down => 1, + LogicalPlanLanguage::Sort(_) => 1, _ => 0, }; @@ -248,7 +247,6 @@ impl BestCubePlan { #[derive(Clone, Copy)] pub struct CubePlanCostOptions { - top_down: bool, penalize_post_processing: bool, } @@ -312,17 +310,6 @@ pub enum CubePlanState { Wrapper, } -impl CubePlanState { - pub fn add_child(&self, other: &Self) -> Self { - match (self, other) { - (CubePlanState::Wrapper, _) => CubePlanState::Wrapper, - (_, CubePlanState::Wrapped) => CubePlanState::Wrapped, - (CubePlanState::Wrapped, _) => CubePlanState::Wrapped, - (CubePlanState::Unwrapped(a), _) => CubePlanState::Unwrapped(*a), - } - } -} - #[derive(Debug, Clone, Eq, Hash, PartialEq)] pub enum SortState { None, @@ -330,55 +317,6 @@ pub enum SortState { DirectChild, } -impl SortState { - pub fn add_child(&self, other: &Self) -> Self { - match (self, other) { - (Self::Current, _) => Self::Current, - (_, Self::Current) | (Self::DirectChild, _) => Self::DirectChild, - _ => Self::None, - } - } -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct CubePlanCostAndState { - pub cost: CubePlanCost, - pub state: CubePlanState, - pub sort_state: SortState, -} - -impl PartialOrd for CubePlanCostAndState { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cost.cmp(&other.cost)) - } -} - -impl Ord for CubePlanCostAndState { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.cost.cmp(&other.cost) - } -} - -impl CubePlanCostAndState { - pub fn add_child(&self, other: &Self) -> Self { - Self { - cost: self.cost.add_child(&other.cost), - state: self.state.add_child(&other.state), - sort_state: self.sort_state.add_child(&other.sort_state), - } - } - - pub fn finalize(&self, enode: &LogicalPlanLanguage, options: CubePlanCostOptions) -> Self { - Self { - cost: self - .cost - .finalize(&self.state, &self.sort_state, enode, options), - state: self.state.clone(), - sort_state: self.sort_state.clone(), - } - } -} - impl CubePlanCost { pub fn add_child(&self, other: &Self) -> Self { Self { @@ -469,7 +407,7 @@ impl CubePlanCost { }, non_pushed_down_limit_sort: match sort_state { SortState::DirectChild => self.non_pushed_down_limit_sort, - SortState::Current if options.top_down => self.non_pushed_down_limit_sort, + SortState::Current => self.non_pushed_down_limit_sort, _ => 0, }, // Don't track state here: we want representation that have fewer wrappers with zero members _in total_ @@ -520,60 +458,6 @@ impl CubePlanCost { } } -impl CostFunction for BestCubePlan { - type Cost = CubePlanCostAndState; - fn cost(&mut self, enode: &LogicalPlanLanguage, mut costs: C) -> Self::Cost - where - C: FnMut(Id) -> Self::Cost, - { - let ast_size_outside_wrapper = match enode { - LogicalPlanLanguage::Aggregate(_) => 1, - LogicalPlanLanguage::Projection(_) => 1, - LogicalPlanLanguage::Limit(_) => 1, - LogicalPlanLanguage::Sort(_) => 1, - LogicalPlanLanguage::Filter(_) => 1, - LogicalPlanLanguage::Join(_) => 1, - LogicalPlanLanguage::CrossJoin(_) => 1, - LogicalPlanLanguage::Union(_) => 1, - LogicalPlanLanguage::Window(_) => 1, - LogicalPlanLanguage::Subquery(_) => 1, - LogicalPlanLanguage::Distinct(_) => 1, - _ => 0, - }; - - let cost = self.initial_cost(enode, false); - let initial_cost = CubePlanCostAndState { - cost, - state: match enode { - LogicalPlanLanguage::CubeScanWrapped(CubeScanWrapped(true)) => { - CubePlanState::Wrapped - } - LogicalPlanLanguage::CubeScanWrapper(_) => CubePlanState::Wrapper, - _ => CubePlanState::Unwrapped(ast_size_outside_wrapper), - }, - sort_state: match enode { - LogicalPlanLanguage::Sort(_) => SortState::Current, - _ => SortState::None, - }, - }; - let res = enode - .children() - .iter() - .fold(initial_cost.clone(), |cost, id| { - let child = costs(*id); - cost.add_child(&child) - }) - .finalize( - enode, - CubePlanCostOptions { - top_down: false, - penalize_post_processing: self.penalize_post_processing, - }, - ); - res - } -} - pub trait TopDownCost: Clone + Debug + PartialOrd { fn add(&self, other: &Self) -> Self; } @@ -902,7 +786,7 @@ impl TopDownState for CubePlanTopDownState { impl TopDownCostFunction for BestCubePlan { fn cost(&self, node: &LogicalPlanLanguage) -> CubePlanCost { - self.initial_cost(node, true) + self.initial_cost(node) } fn finalize( @@ -917,7 +801,6 @@ impl TopDownCostFunction, span_id: Option>, - top_down_extractor: bool, ) -> Result { let cube_context = self.cube_context.clone(); let egraph = self.graph.clone(); @@ -361,26 +360,16 @@ impl Rewriter { Self::run_rewrites(&cube_context, egraph, rules, "final")?; // TODO maybe check replacers and penalized_ast_size_outside_wrapper right after extraction? - let best = if top_down_extractor { - let mut extractor = TopDownExtractor::new( - &runner.egraph, - BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing), - CubePlanTopDownState::new(), - ); - let Some((best_cost, best)) = extractor.find_best(root) else { - return Err(CubeError::internal("Unable to find best plan".to_string())); - }; - log::debug!("Best cost: {:#?}", best_cost); - best - } else { - let extractor = Extractor::new( - &runner.egraph, - BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing), - ); - let (best_cost, best) = extractor.find_best(root); - log::debug!("Best cost: {:#?}", best_cost); - best + let mut extractor = TopDownExtractor::new( + &runner.egraph, + BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing), + CubePlanTopDownState::new(), + ); + let Some((best_cost, best)) = extractor.find_best(root) else { + return Err(CubeError::internal("Unable to find best plan".to_string())); }; + log::debug!("Best cost: {:#?}", best_cost); + let qtrace_best_graph = if Qtrace::is_enabled() { best.as_ref().to_vec() } else { @@ -474,12 +463,6 @@ impl Rewriter { .unwrap_or(true) } - pub fn top_down_extractor_enabled() -> bool { - env::var("CUBESQL_TOP_DOWN_EXTRACTOR") - .map(|v| v.to_lowercase() != "false") - .unwrap_or(true) - } - pub fn rewrite_rules( meta_context: Arc, config_obj: Arc, diff --git a/rust/cubesql/cubesql/src/config/mod.rs b/rust/cubesql/cubesql/src/config/mod.rs index 9f49884f26488..a3ed43d99077d 100644 --- a/rust/cubesql/cubesql/src/config/mod.rs +++ b/rust/cubesql/cubesql/src/config/mod.rs @@ -115,8 +115,6 @@ pub trait ConfigObj: DIService + Debug { fn max_sessions(&self) -> usize; fn no_implicit_order(&self) -> bool; - - fn top_down_extractor(&self) -> bool; } #[derive(Debug, Clone)] @@ -137,7 +135,6 @@ pub struct ConfigObjImpl { pub non_streaming_query_max_row_limit: i32, pub max_sessions: usize, pub no_implicit_order: bool, - pub top_down_extractor: bool, } impl ConfigObjImpl { @@ -175,7 +172,6 @@ impl ConfigObjImpl { non_streaming_query_max_row_limit: env_parse("CUBEJS_DB_QUERY_LIMIT", 50000), max_sessions: env_parse("CUBEJS_MAX_SESSIONS", 1024), no_implicit_order: env_parse("CUBESQL_SQL_NO_IMPLICIT_ORDER", true), - top_down_extractor: env_parse("CUBESQL_TOP_DOWN_EXTRACTOR", true), } } } @@ -242,10 +238,6 @@ impl ConfigObj for ConfigObjImpl { fn max_sessions(&self) -> usize { self.max_sessions } - - fn top_down_extractor(&self) -> bool { - self.top_down_extractor - } } impl Config { @@ -278,7 +270,6 @@ impl Config { non_streaming_query_max_row_limit: 50000, max_sessions: 1024, no_implicit_order: true, - top_down_extractor: true, }), } }