Skip to content

Commit e2a6a55

Browse files
committed
feat(cubesql): Remove bottom-up extraction completely
1 parent 8c4566c commit e2a6a55

File tree

6 files changed

+21
-182
lines changed

6 files changed

+21
-182
lines changed

.github/workflows/rust-cubesql.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,7 @@ jobs:
6262
# We use host instead of cross container, because it's much faster
6363
runs-on: ubuntu-24.04
6464
timeout-minutes: 60
65-
name: Unit (Rewrite Engine) (CUBESQL_TOP_DOWN_EXTRACTOR=${{ matrix.top-down-extractor }})
66-
strategy:
67-
matrix:
68-
top-down-extractor: ['true', 'false']
69-
fail-fast: false
65+
name: Unit (Rewrite Engine)
7066

7167
steps:
7268
- name: Checkout
@@ -94,7 +90,6 @@ jobs:
9490
CUBESQL_TESTING_CUBE_TOKEN: ${{ secrets.CUBESQL_TESTING_CUBE_TOKEN }}
9591
CUBESQL_TESTING_CUBE_URL: ${{ secrets.CUBESQL_TESTING_CUBE_URL }}
9692
CUBESQL_SQL_PUSH_DOWN: true
97-
CUBESQL_TOP_DOWN_EXTRACTOR: ${{ matrix.top-down-extractor }}
9893
CUBESQL_REWRITE_CACHE: true
9994
CUBESQL_REWRITE_TIMEOUT: 60
10095
run: |

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13920,11 +13920,7 @@ ORDER BY "source"."str0" ASC
1392013920
// CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10))
1392113921
if Rewriter::sql_push_down_enabled() {
1392213922
let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql;
13923-
if Rewriter::top_down_extractor_enabled() {
13924-
assert!(sql.contains("LIMIT 1000"));
13925-
} else {
13926-
assert!(sql.contains("\"limit\": 1000"));
13927-
}
13923+
assert!(sql.contains("LIMIT 1000"));
1392813924
assert!(sql.contains("% 7"));
1392913925

1393013926
let physical_plan = query_plan.as_physical_plan().await.unwrap();
@@ -16055,18 +16051,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1605516051
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
1605616052
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
1605716053
granularity: Some("month".to_string()),
16058-
date_range: if Rewriter::top_down_extractor_enabled() {
16059-
Some(json!(vec![
16060-
"2019-01-01T00:00:00.000Z".to_string(),
16061-
"2019-01-31T23:59:59.999Z".to_string()
16062-
]))
16063-
} else {
16064-
// Non-optimal variant with top down extractor disabled
16065-
Some(json!(vec![
16066-
"2019-01-01 00:00:00.000".to_string(),
16067-
"2019-01-31 23:59:59.999".to_string()
16068-
]))
16069-
}
16054+
date_range: Some(json!(vec![
16055+
"2019-01-01T00:00:00.000Z".to_string(),
16056+
"2019-01-31T23:59:59.999Z".to_string()
16057+
]))
1607016058
}]),
1607116059
order: Some(vec![]),
1607216060
..Default::default()

rust/cubesql/cubesql/src/compile/query_engine.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ pub trait QueryEngine {
226226
state.auth_context().unwrap(),
227227
qtrace,
228228
span_id.clone(),
229-
self.config_ref().top_down_extractor(),
230229
)
231230
.await
232231
.map_err(|e| match e.cause {

rust/cubesql/cubesql/src/compile/rewrite/cost.rs

Lines changed: 5 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
},
99
transport::{MetaContext, V1CubeMetaDimensionExt},
1010
};
11-
use egg::{Analysis, CostFunction, EGraph, Id, Language, RecExpr};
11+
use egg::{Analysis, EGraph, Id, Language, RecExpr};
1212
use indexmap::IndexSet;
1313

1414
#[derive(Debug)]
@@ -25,7 +25,7 @@ impl BestCubePlan {
2525
}
2626
}
2727

28-
pub fn initial_cost(&self, enode: &LogicalPlanLanguage, top_down: bool) -> CubePlanCost {
28+
pub fn initial_cost(&self, enode: &LogicalPlanLanguage) -> CubePlanCost {
2929
let table_scans = match enode {
3030
LogicalPlanLanguage::TableScan(_) => 1,
3131
_ => 0,
@@ -52,8 +52,7 @@ impl BestCubePlan {
5252
};
5353

5454
let non_pushed_down_limit_sort = match enode {
55-
LogicalPlanLanguage::Limit(_) if !top_down => 1,
56-
LogicalPlanLanguage::Sort(_) if top_down => 1,
55+
LogicalPlanLanguage::Sort(_) => 1,
5756
_ => 0,
5857
};
5958

@@ -247,7 +246,6 @@ impl BestCubePlan {
247246

248247
#[derive(Clone, Copy)]
249248
pub struct CubePlanCostOptions {
250-
top_down: bool,
251249
penalize_post_processing: bool,
252250
}
253251

@@ -311,73 +309,13 @@ pub enum CubePlanState {
311309
Wrapper,
312310
}
313311

314-
impl CubePlanState {
315-
pub fn add_child(&self, other: &Self) -> Self {
316-
match (self, other) {
317-
(CubePlanState::Wrapper, _) => CubePlanState::Wrapper,
318-
(_, CubePlanState::Wrapped) => CubePlanState::Wrapped,
319-
(CubePlanState::Wrapped, _) => CubePlanState::Wrapped,
320-
(CubePlanState::Unwrapped(a), _) => CubePlanState::Unwrapped(*a),
321-
}
322-
}
323-
}
324-
325312
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
326313
pub enum SortState {
327314
None,
328315
Current,
329316
DirectChild,
330317
}
331318

332-
impl SortState {
333-
pub fn add_child(&self, other: &Self) -> Self {
334-
match (self, other) {
335-
(Self::Current, _) => Self::Current,
336-
(_, Self::Current) | (Self::DirectChild, _) => Self::DirectChild,
337-
_ => Self::None,
338-
}
339-
}
340-
}
341-
342-
#[derive(Debug, Clone, Eq, PartialEq)]
343-
pub struct CubePlanCostAndState {
344-
pub cost: CubePlanCost,
345-
pub state: CubePlanState,
346-
pub sort_state: SortState,
347-
}
348-
349-
impl PartialOrd for CubePlanCostAndState {
350-
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
351-
Some(self.cost.cmp(&other.cost))
352-
}
353-
}
354-
355-
impl Ord for CubePlanCostAndState {
356-
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
357-
self.cost.cmp(&other.cost)
358-
}
359-
}
360-
361-
impl CubePlanCostAndState {
362-
pub fn add_child(&self, other: &Self) -> Self {
363-
Self {
364-
cost: self.cost.add_child(&other.cost),
365-
state: self.state.add_child(&other.state),
366-
sort_state: self.sort_state.add_child(&other.sort_state),
367-
}
368-
}
369-
370-
pub fn finalize(&self, enode: &LogicalPlanLanguage, options: CubePlanCostOptions) -> Self {
371-
Self {
372-
cost: self
373-
.cost
374-
.finalize(&self.state, &self.sort_state, enode, options),
375-
state: self.state.clone(),
376-
sort_state: self.sort_state.clone(),
377-
}
378-
}
379-
}
380-
381319
impl CubePlanCost {
382320
pub fn add_child(&self, other: &Self) -> Self {
383321
Self {
@@ -468,7 +406,7 @@ impl CubePlanCost {
468406
},
469407
non_pushed_down_limit_sort: match sort_state {
470408
SortState::DirectChild => self.non_pushed_down_limit_sort,
471-
SortState::Current if options.top_down => self.non_pushed_down_limit_sort,
409+
SortState::Current => self.non_pushed_down_limit_sort,
472410
_ => 0,
473411
},
474412
// Don't track state here: we want representation that have fewer wrappers with zero members _in total_
@@ -519,60 +457,6 @@ impl CubePlanCost {
519457
}
520458
}
521459

522-
impl CostFunction<LogicalPlanLanguage> for BestCubePlan {
523-
type Cost = CubePlanCostAndState;
524-
fn cost<C>(&mut self, enode: &LogicalPlanLanguage, mut costs: C) -> Self::Cost
525-
where
526-
C: FnMut(Id) -> Self::Cost,
527-
{
528-
let ast_size_outside_wrapper = match enode {
529-
LogicalPlanLanguage::Aggregate(_) => 1,
530-
LogicalPlanLanguage::Projection(_) => 1,
531-
LogicalPlanLanguage::Limit(_) => 1,
532-
LogicalPlanLanguage::Sort(_) => 1,
533-
LogicalPlanLanguage::Filter(_) => 1,
534-
LogicalPlanLanguage::Join(_) => 1,
535-
LogicalPlanLanguage::CrossJoin(_) => 1,
536-
LogicalPlanLanguage::Union(_) => 1,
537-
LogicalPlanLanguage::Window(_) => 1,
538-
LogicalPlanLanguage::Subquery(_) => 1,
539-
LogicalPlanLanguage::Distinct(_) => 1,
540-
_ => 0,
541-
};
542-
543-
let cost = self.initial_cost(enode, false);
544-
let initial_cost = CubePlanCostAndState {
545-
cost,
546-
state: match enode {
547-
LogicalPlanLanguage::CubeScanWrapped(CubeScanWrapped(true)) => {
548-
CubePlanState::Wrapped
549-
}
550-
LogicalPlanLanguage::CubeScanWrapper(_) => CubePlanState::Wrapper,
551-
_ => CubePlanState::Unwrapped(ast_size_outside_wrapper),
552-
},
553-
sort_state: match enode {
554-
LogicalPlanLanguage::Sort(_) => SortState::Current,
555-
_ => SortState::None,
556-
},
557-
};
558-
let res = enode
559-
.children()
560-
.iter()
561-
.fold(initial_cost.clone(), |cost, id| {
562-
let child = costs(*id);
563-
cost.add_child(&child)
564-
})
565-
.finalize(
566-
enode,
567-
CubePlanCostOptions {
568-
top_down: false,
569-
penalize_post_processing: self.penalize_post_processing,
570-
},
571-
);
572-
res
573-
}
574-
}
575-
576460
pub trait TopDownCost: Clone + Debug + PartialOrd {
577461
fn add(&self, other: &Self) -> Self;
578462
}
@@ -901,7 +785,7 @@ impl TopDownState<LogicalPlanLanguage> for CubePlanTopDownState {
901785

902786
impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost> for BestCubePlan {
903787
fn cost(&self, node: &LogicalPlanLanguage) -> CubePlanCost {
904-
self.initial_cost(node, true)
788+
self.initial_cost(node)
905789
}
906790

907791
fn finalize(
@@ -916,7 +800,6 @@ impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost
916800
&state.limit,
917801
node,
918802
CubePlanCostOptions {
919-
top_down: true,
920803
penalize_post_processing: self.penalize_post_processing,
921804
},
922805
)

rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use crate::{
2323
use datafusion::{
2424
logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner, scalar::ScalarValue,
2525
};
26-
use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason};
26+
use egg::{EGraph, Id, IterationData, Language, Rewrite, Runner, StopReason};
2727
use serde::{Deserialize, Serialize};
2828
use std::{
2929
collections::{HashMap, HashSet},
@@ -330,7 +330,6 @@ impl Rewriter {
330330
auth_context: AuthContextRef,
331331
qtrace: &mut Option<Qtrace>,
332332
span_id: Option<Arc<SpanId>>,
333-
top_down_extractor: bool,
334333
) -> Result<LogicalPlan, CubeError> {
335334
let cube_context = self.cube_context.clone();
336335
let egraph = self.graph.clone();
@@ -361,26 +360,16 @@ impl Rewriter {
361360
Self::run_rewrites(&cube_context, egraph, rules, "final")?;
362361

363362
// TODO maybe check replacers and penalized_ast_size_outside_wrapper right after extraction?
364-
let best = if top_down_extractor {
365-
let mut extractor = TopDownExtractor::new(
366-
&runner.egraph,
367-
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
368-
CubePlanTopDownState::new(),
369-
);
370-
let Some((best_cost, best)) = extractor.find_best(root) else {
371-
return Err(CubeError::internal("Unable to find best plan".to_string()));
372-
};
373-
log::debug!("Best cost: {:#?}", best_cost);
374-
best
375-
} else {
376-
let extractor = Extractor::new(
377-
&runner.egraph,
378-
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
379-
);
380-
let (best_cost, best) = extractor.find_best(root);
381-
log::debug!("Best cost: {:#?}", best_cost);
382-
best
363+
let mut extractor = TopDownExtractor::new(
364+
&runner.egraph,
365+
BestCubePlan::new(cube_context.meta.clone(), penalize_post_processing),
366+
CubePlanTopDownState::new(),
367+
);
368+
let Some((best_cost, best)) = extractor.find_best(root) else {
369+
return Err(CubeError::internal("Unable to find best plan".to_string()));
383370
};
371+
log::debug!("Best cost: {:#?}", best_cost);
372+
384373
let qtrace_best_graph = if Qtrace::is_enabled() {
385374
best.as_ref().to_vec()
386375
} else {
@@ -474,12 +463,6 @@ impl Rewriter {
474463
.unwrap_or(true)
475464
}
476465

477-
pub fn top_down_extractor_enabled() -> bool {
478-
env::var("CUBESQL_TOP_DOWN_EXTRACTOR")
479-
.map(|v| v.to_lowercase() != "false")
480-
.unwrap_or(true)
481-
}
482-
483466
pub fn rewrite_rules(
484467
meta_context: Arc<MetaContext>,
485468
config_obj: Arc<dyn ConfigObj>,

rust/cubesql/cubesql/src/config/mod.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ pub trait ConfigObj: DIService + Debug {
115115
fn max_sessions(&self) -> usize;
116116

117117
fn no_implicit_order(&self) -> bool;
118-
119-
fn top_down_extractor(&self) -> bool;
120118
}
121119

122120
#[derive(Debug, Clone)]
@@ -137,7 +135,6 @@ pub struct ConfigObjImpl {
137135
pub non_streaming_query_max_row_limit: i32,
138136
pub max_sessions: usize,
139137
pub no_implicit_order: bool,
140-
pub top_down_extractor: bool,
141138
}
142139

143140
impl ConfigObjImpl {
@@ -175,7 +172,6 @@ impl ConfigObjImpl {
175172
non_streaming_query_max_row_limit: env_parse("CUBEJS_DB_QUERY_LIMIT", 50000),
176173
max_sessions: env_parse("CUBEJS_MAX_SESSIONS", 1024),
177174
no_implicit_order: env_parse("CUBESQL_SQL_NO_IMPLICIT_ORDER", true),
178-
top_down_extractor: env_parse("CUBESQL_TOP_DOWN_EXTRACTOR", true),
179175
}
180176
}
181177
}
@@ -242,10 +238,6 @@ impl ConfigObj for ConfigObjImpl {
242238
fn max_sessions(&self) -> usize {
243239
self.max_sessions
244240
}
245-
246-
fn top_down_extractor(&self) -> bool {
247-
self.top_down_extractor
248-
}
249241
}
250242

251243
impl Config {
@@ -278,7 +270,6 @@ impl Config {
278270
non_streaming_query_max_row_limit: 50000,
279271
max_sessions: 1024,
280272
no_implicit_order: true,
281-
top_down_extractor: true,
282273
}),
283274
}
284275
}

0 commit comments

Comments
 (0)