Skip to content

Commit 26cf8ef

Browse files
committed
feat(cubesql): Remove bottom-up extraction completely
1 parent 1da80db commit 26cf8ef

File tree

6 files changed

+22
-155
lines changed

6 files changed

+22
-155
lines changed

.github/workflows/rust-cubesql.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,7 @@ jobs:
6262
# We use host instead of cross container, because it's much faster
6363
runs-on: ubuntu-24.04
6464
timeout-minutes: 60
65-
name: Unit (Rewrite Engine) (CUBESQL_TOP_DOWN_EXTRACTOR=${{ matrix.top-down-extractor }})
66-
strategy:
67-
matrix:
68-
top-down-extractor: ['true', 'false']
69-
fail-fast: false
65+
name: Unit (Rewrite Engine)
7066

7167
steps:
7268
- name: Checkout
@@ -94,7 +90,6 @@ jobs:
9490
CUBESQL_TESTING_CUBE_TOKEN: ${{ secrets.CUBESQL_TESTING_CUBE_TOKEN }}
9591
CUBESQL_TESTING_CUBE_URL: ${{ secrets.CUBESQL_TESTING_CUBE_URL }}
9692
CUBESQL_SQL_PUSH_DOWN: true
97-
CUBESQL_TOP_DOWN_EXTRACTOR: ${{ matrix.top-down-extractor }}
9893
CUBESQL_REWRITE_CACHE: true
9994
CUBESQL_REWRITE_TIMEOUT: 60
10095
run: |

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13735,11 +13735,7 @@ ORDER BY "source"."str0" ASC
1373513735
// CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10))
1373613736
if Rewriter::sql_push_down_enabled() {
1373713737
let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql;
13738-
if Rewriter::top_down_extractor_enabled() {
13739-
assert!(sql.contains("LIMIT 1000"));
13740-
} else {
13741-
assert!(sql.contains("\"limit\": 1000"));
13742-
}
13738+
assert!(sql.contains("LIMIT 1000"));
1374313739
assert!(sql.contains("% 7"));
1374413740

1374513741
let physical_plan = query_plan.as_physical_plan().await.unwrap();
@@ -15832,18 +15828,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1583215828
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
1583315829
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
1583415830
granularity: Some("month".to_string()),
15835-
date_range: if Rewriter::top_down_extractor_enabled() {
15836-
Some(json!(vec![
15837-
"2019-01-01T00:00:00.000Z".to_string(),
15838-
"2019-01-31T23:59:59.999Z".to_string()
15839-
]))
15840-
} else {
15841-
// Non-optimal variant with top down extractor disabled
15842-
Some(json!(vec![
15843-
"2019-01-01 00:00:00.000".to_string(),
15844-
"2019-01-31 23:59:59.999".to_string()
15845-
]))
15846-
}
15831+
date_range: Some(json!(vec![
15832+
"2019-01-01T00:00:00.000Z".to_string(),
15833+
"2019-01-31T23:59:59.999Z".to_string()
15834+
]))
1584715835
}]),
1584815836
order: Some(vec![]),
1584915837
..Default::default()

rust/cubesql/cubesql/src/compile/query_engine.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,6 @@ pub trait QueryEngine {
225225
state.auth_context().unwrap(),
226226
qtrace,
227227
span_id.clone(),
228-
self.config_ref().top_down_extractor(),
229228
)
230229
.await
231230
.map_err(|e| match e.cause {

rust/cubesql/cubesql/src/compile/rewrite/cost.rs

Lines changed: 6 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
},
99
transport::{MetaContext, V1CubeMetaDimensionExt},
1010
};
11-
use egg::{Analysis, CostFunction, EGraph, Id, Language, RecExpr};
11+
use egg::{Analysis, EGraph, Id, Language, RecExpr};
1212
use indexmap::IndexSet;
1313

1414
#[derive(Debug)]
@@ -21,7 +21,7 @@ impl BestCubePlan {
2121
Self { meta_context }
2222
}
2323

24-
pub fn initial_cost(&self, enode: &LogicalPlanLanguage, top_down: bool) -> CubePlanCost {
24+
pub fn initial_cost(&self, enode: &LogicalPlanLanguage) -> CubePlanCost {
2525
let table_scans = match enode {
2626
LogicalPlanLanguage::TableScan(_) => 1,
2727
_ => 0,
@@ -48,8 +48,7 @@ impl BestCubePlan {
4848
};
4949

5050
let non_pushed_down_limit_sort = match enode {
51-
LogicalPlanLanguage::Limit(_) if !top_down => 1,
52-
LogicalPlanLanguage::Sort(_) if top_down => 1,
51+
LogicalPlanLanguage::Sort(_) => 1,
5352
_ => 0,
5453
};
5554

@@ -315,45 +314,6 @@ impl SortState {
315314
}
316315
}
317316

318-
#[derive(Debug, Clone, Eq, PartialEq)]
319-
pub struct CubePlanCostAndState {
320-
pub cost: CubePlanCost,
321-
pub state: CubePlanState,
322-
pub sort_state: SortState,
323-
}
324-
325-
impl PartialOrd for CubePlanCostAndState {
326-
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
327-
Some(self.cost.cmp(&other.cost))
328-
}
329-
}
330-
331-
impl Ord for CubePlanCostAndState {
332-
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
333-
self.cost.cmp(&other.cost)
334-
}
335-
}
336-
337-
impl CubePlanCostAndState {
338-
pub fn add_child(&self, other: &Self) -> Self {
339-
Self {
340-
cost: self.cost.add_child(&other.cost),
341-
state: self.state.add_child(&other.state),
342-
sort_state: self.sort_state.add_child(&other.sort_state),
343-
}
344-
}
345-
346-
pub fn finalize(&self, enode: &LogicalPlanLanguage) -> Self {
347-
Self {
348-
cost: self
349-
.cost
350-
.finalize(&self.state, &self.sort_state, enode, false),
351-
state: self.state.clone(),
352-
sort_state: self.sort_state.clone(),
353-
}
354-
}
355-
}
356-
357317
impl CubePlanCost {
358318
pub fn add_child(&self, other: &Self) -> Self {
359319
Self {
@@ -407,7 +367,6 @@ impl CubePlanCost {
407367
state: &CubePlanState,
408368
sort_state: &SortState,
409369
enode: &LogicalPlanLanguage,
410-
top_down: bool,
411370
) -> Self {
412371
Self {
413372
replacers: self.replacers,
@@ -428,7 +387,7 @@ impl CubePlanCost {
428387
},
429388
non_pushed_down_limit_sort: match sort_state {
430389
SortState::DirectChild => self.non_pushed_down_limit_sort,
431-
SortState::Current if top_down => self.non_pushed_down_limit_sort,
390+
SortState::Current => self.non_pushed_down_limit_sort,
432391
_ => 0,
433392
},
434393
// Don't track state here: we want representation that have fewer wrappers with zero members _in total_
@@ -482,54 +441,6 @@ impl CubePlanCost {
482441
}
483442
}
484443

485-
impl CostFunction<LogicalPlanLanguage> for BestCubePlan {
486-
type Cost = CubePlanCostAndState;
487-
fn cost<C>(&mut self, enode: &LogicalPlanLanguage, mut costs: C) -> Self::Cost
488-
where
489-
C: FnMut(Id) -> Self::Cost,
490-
{
491-
let ast_size_outside_wrapper = match enode {
492-
LogicalPlanLanguage::Aggregate(_) => 1,
493-
LogicalPlanLanguage::Projection(_) => 1,
494-
LogicalPlanLanguage::Limit(_) => 1,
495-
LogicalPlanLanguage::Sort(_) => 1,
496-
LogicalPlanLanguage::Filter(_) => 1,
497-
LogicalPlanLanguage::Join(_) => 1,
498-
LogicalPlanLanguage::CrossJoin(_) => 1,
499-
LogicalPlanLanguage::Union(_) => 1,
500-
LogicalPlanLanguage::Window(_) => 1,
501-
LogicalPlanLanguage::Subquery(_) => 1,
502-
LogicalPlanLanguage::Distinct(_) => 1,
503-
_ => 0,
504-
};
505-
506-
let cost = self.initial_cost(enode, false);
507-
let initial_cost = CubePlanCostAndState {
508-
cost,
509-
state: match enode {
510-
LogicalPlanLanguage::CubeScanWrapped(CubeScanWrapped(true)) => {
511-
CubePlanState::Wrapped
512-
}
513-
LogicalPlanLanguage::CubeScanWrapper(_) => CubePlanState::Wrapper,
514-
_ => CubePlanState::Unwrapped(ast_size_outside_wrapper),
515-
},
516-
sort_state: match enode {
517-
LogicalPlanLanguage::Sort(_) => SortState::Current,
518-
_ => SortState::None,
519-
},
520-
};
521-
let res = enode
522-
.children()
523-
.iter()
524-
.fold(initial_cost.clone(), |cost, id| {
525-
let child = costs(*id);
526-
cost.add_child(&child)
527-
})
528-
.finalize(enode);
529-
res
530-
}
531-
}
532-
533444
pub trait TopDownCost: Clone + Debug + PartialOrd {
534445
fn add(&self, other: &Self) -> Self;
535446
}
@@ -858,7 +769,7 @@ impl TopDownState<LogicalPlanLanguage> for CubePlanTopDownState {
858769

859770
impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost> for BestCubePlan {
860771
fn cost(&self, node: &LogicalPlanLanguage) -> CubePlanCost {
861-
self.initial_cost(node, true)
772+
self.initial_cost(node)
862773
}
863774

864775
fn finalize(
@@ -867,6 +778,6 @@ impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost
867778
node: &LogicalPlanLanguage,
868779
state: &CubePlanTopDownState,
869780
) -> CubePlanCost {
870-
CubePlanCost::finalize(&cost, &state.wrapped, &state.limit, node, true)
781+
CubePlanCost::finalize(&cost, &state.wrapped, &state.limit, node)
871782
}
872783
}

rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::{
2222
use datafusion::{
2323
logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner, scalar::ScalarValue,
2424
};
25-
use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason};
25+
use egg::{EGraph, Id, IterationData, Language, Rewrite, Runner, StopReason};
2626
use serde::{Deserialize, Serialize};
2727
use std::{
2828
collections::{HashMap, HashSet},
@@ -329,7 +329,6 @@ impl Rewriter {
329329
auth_context: AuthContextRef,
330330
qtrace: &mut Option<Qtrace>,
331331
span_id: Option<Arc<SpanId>>,
332-
top_down_extractor: bool,
333332
) -> Result<LogicalPlan, CubeError> {
334333
let cube_context = self.cube_context.clone();
335334
let egraph = self.graph.clone();
@@ -349,26 +348,16 @@ impl Rewriter {
349348
let (runner, qtrace_egraph_iterations) =
350349
Self::run_rewrites(&cube_context, egraph, rules, "final")?;
351350

352-
let best = if top_down_extractor {
353-
let mut extractor = TopDownExtractor::new(
354-
&runner.egraph,
355-
BestCubePlan::new(cube_context.meta.clone()),
356-
CubePlanTopDownState::new(),
357-
);
358-
let Some((best_cost, best)) = extractor.find_best(root) else {
359-
return Err(CubeError::internal("Unable to find best plan".to_string()));
360-
};
361-
log::debug!("Best cost: {:#?}", best_cost);
362-
best
363-
} else {
364-
let extractor = Extractor::new(
365-
&runner.egraph,
366-
BestCubePlan::new(cube_context.meta.clone()),
367-
);
368-
let (best_cost, best) = extractor.find_best(root);
369-
log::debug!("Best cost: {:#?}", best_cost);
370-
best
351+
let mut extractor = TopDownExtractor::new(
352+
&runner.egraph,
353+
BestCubePlan::new(cube_context.meta.clone()),
354+
CubePlanTopDownState::new(),
355+
);
356+
let Some((best_cost, best)) = extractor.find_best(root) else {
357+
return Err(CubeError::internal("Unable to find best plan".to_string()));
371358
};
359+
log::debug!("Best cost: {:#?}", best_cost);
360+
372361
let qtrace_best_graph = if Qtrace::is_enabled() {
373362
best.as_ref().iter().cloned().collect()
374363
} else {
@@ -461,12 +450,6 @@ impl Rewriter {
461450
.unwrap_or(true)
462451
}
463452

464-
pub fn top_down_extractor_enabled() -> bool {
465-
env::var("CUBESQL_TOP_DOWN_EXTRACTOR")
466-
.map(|v| v.to_lowercase() != "false")
467-
.unwrap_or(true)
468-
}
469-
470453
pub fn rewrite_rules(
471454
meta_context: Arc<MetaContext>,
472455
config_obj: Arc<dyn ConfigObj>,

rust/cubesql/cubesql/src/config/mod.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ pub trait ConfigObj: DIService + Debug {
115115
fn max_sessions(&self) -> usize;
116116

117117
fn no_implicit_order(&self) -> bool;
118-
119-
fn top_down_extractor(&self) -> bool;
120118
}
121119

122120
#[derive(Debug, Clone)]
@@ -137,7 +135,6 @@ pub struct ConfigObjImpl {
137135
pub non_streaming_query_max_row_limit: i32,
138136
pub max_sessions: usize,
139137
pub no_implicit_order: bool,
140-
pub top_down_extractor: bool,
141138
}
142139

143140
impl ConfigObjImpl {
@@ -175,7 +172,6 @@ impl ConfigObjImpl {
175172
non_streaming_query_max_row_limit: env_parse("CUBEJS_DB_QUERY_LIMIT", 50000),
176173
max_sessions: env_parse("CUBEJS_MAX_SESSIONS", 1024),
177174
no_implicit_order: env_parse("CUBESQL_SQL_NO_IMPLICIT_ORDER", true),
178-
top_down_extractor: env_parse("CUBESQL_TOP_DOWN_EXTRACTOR", true),
179175
}
180176
}
181177
}
@@ -242,10 +238,6 @@ impl ConfigObj for ConfigObjImpl {
242238
fn max_sessions(&self) -> usize {
243239
self.max_sessions
244240
}
245-
246-
fn top_down_extractor(&self) -> bool {
247-
self.top_down_extractor
248-
}
249241
}
250242

251243
impl Config {
@@ -278,7 +270,6 @@ impl Config {
278270
non_streaming_query_max_row_limit: 50000,
279271
max_sessions: 1024,
280272
no_implicit_order: true,
281-
top_down_extractor: true,
282273
}),
283274
}
284275
}

0 commit comments

Comments
 (0)