Skip to content

Commit 0c7ab7c

Browse files
committed
feat(cubesql): Remove bottom-up extraction completely
1 parent 6ea1817 commit 0c7ab7c

File tree

6 files changed

+22
-176
lines changed

6 files changed

+22
-176
lines changed

.github/workflows/rust-cubesql.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,7 @@ jobs:
6262
# We use host instead of cross container, because it's much faster
6363
runs-on: ubuntu-24.04
6464
timeout-minutes: 60
65-
name: Unit (Rewrite Engine) (CUBESQL_TOP_DOWN_EXTRACTOR=${{ matrix.top-down-extractor }})
66-
strategy:
67-
matrix:
68-
top-down-extractor: ['true', 'false']
69-
fail-fast: false
65+
name: Unit (Rewrite Engine)
7066

7167
steps:
7268
- name: Checkout
@@ -94,7 +90,6 @@ jobs:
9490
CUBESQL_TESTING_CUBE_TOKEN: ${{ secrets.CUBESQL_TESTING_CUBE_TOKEN }}
9591
CUBESQL_TESTING_CUBE_URL: ${{ secrets.CUBESQL_TESTING_CUBE_URL }}
9692
CUBESQL_SQL_PUSH_DOWN: true
97-
CUBESQL_TOP_DOWN_EXTRACTOR: ${{ matrix.top-down-extractor }}
9893
CUBESQL_REWRITE_CACHE: true
9994
CUBESQL_REWRITE_TIMEOUT: 60
10095
run: |

rust/cubesql/cubesql/src/compile/mod.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13920,11 +13920,7 @@ ORDER BY "source"."str0" ASC
1392013920
// CAST(CAST(ta_1.order_date AS Date32) - CAST(CAST(Utf8("1970-01-01") AS Date32) AS Date32) + Int64(3) AS Decimal(38, 10))
1392113921
if Rewriter::sql_push_down_enabled() {
1392213922
let sql = logical_plan.find_cube_scan_wrapped_sql().wrapped_sql.sql;
13923-
if Rewriter::top_down_extractor_enabled() {
13924-
assert!(sql.contains("LIMIT 1000"));
13925-
} else {
13926-
assert!(sql.contains("\"limit\": 1000"));
13927-
}
13923+
assert!(sql.contains("LIMIT 1000"));
1392813924
assert!(sql.contains("% 7"));
1392913925

1393013926
let physical_plan = query_plan.as_physical_plan().await.unwrap();
@@ -16019,18 +16015,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
1601916015
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
1602016016
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
1602116017
granularity: Some("month".to_string()),
16022-
date_range: if Rewriter::top_down_extractor_enabled() {
16023-
Some(json!(vec![
16024-
"2019-01-01T00:00:00.000Z".to_string(),
16025-
"2019-01-31T23:59:59.999Z".to_string()
16026-
]))
16027-
} else {
16028-
// Non-optimal variant with top down extractor disabled
16029-
Some(json!(vec![
16030-
"2019-01-01 00:00:00.000".to_string(),
16031-
"2019-01-31 23:59:59.999".to_string()
16032-
]))
16033-
}
16018+
date_range: Some(json!(vec![
16019+
"2019-01-01T00:00:00.000Z".to_string(),
16020+
"2019-01-31T23:59:59.999Z".to_string()
16021+
]))
1603416022
}]),
1603516023
order: Some(vec![]),
1603616024
..Default::default()

rust/cubesql/cubesql/src/compile/query_engine.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ pub trait QueryEngine {
226226
state.auth_context().unwrap(),
227227
qtrace,
228228
span_id.clone(),
229-
self.config_ref().top_down_extractor(),
230229
)
231230
.await
232231
.map_err(|e| match e.cause {

rust/cubesql/cubesql/src/compile/rewrite/cost.rs

Lines changed: 6 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
},
99
transport::{MetaContext, V1CubeMetaDimensionExt},
1010
};
11-
use egg::{Analysis, CostFunction, EGraph, Id, Language, RecExpr};
11+
use egg::{Analysis, EGraph, Id, Language, RecExpr};
1212
use indexmap::IndexSet;
1313

1414
#[derive(Debug)]
@@ -21,7 +21,7 @@ impl BestCubePlan {
2121
Self { meta_context }
2222
}
2323

24-
pub fn initial_cost(&self, enode: &LogicalPlanLanguage, top_down: bool) -> CubePlanCost {
24+
pub fn initial_cost(&self, enode: &LogicalPlanLanguage) -> CubePlanCost {
2525
let table_scans = match enode {
2626
LogicalPlanLanguage::TableScan(_) => 1,
2727
_ => 0,
@@ -48,8 +48,7 @@ impl BestCubePlan {
4848
};
4949

5050
let non_pushed_down_limit_sort = match enode {
51-
LogicalPlanLanguage::Limit(_) if !top_down => 1,
52-
LogicalPlanLanguage::Sort(_) if top_down => 1,
51+
LogicalPlanLanguage::Sort(_) => 1,
5352
_ => 0,
5453
};
5554

@@ -297,73 +296,13 @@ pub enum CubePlanState {
297296
Wrapper,
298297
}
299298

300-
impl CubePlanState {
301-
pub fn add_child(&self, other: &Self) -> Self {
302-
match (self, other) {
303-
(CubePlanState::Wrapper, _) => CubePlanState::Wrapper,
304-
(_, CubePlanState::Wrapped) => CubePlanState::Wrapped,
305-
(CubePlanState::Wrapped, _) => CubePlanState::Wrapped,
306-
(CubePlanState::Unwrapped(a), _) => CubePlanState::Unwrapped(*a),
307-
}
308-
}
309-
}
310-
311299
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
312300
pub enum SortState {
313301
None,
314302
Current,
315303
DirectChild,
316304
}
317305

318-
impl SortState {
319-
pub fn add_child(&self, other: &Self) -> Self {
320-
match (self, other) {
321-
(Self::Current, _) => Self::Current,
322-
(_, Self::Current) | (Self::DirectChild, _) => Self::DirectChild,
323-
_ => Self::None,
324-
}
325-
}
326-
}
327-
328-
#[derive(Debug, Clone, Eq, PartialEq)]
329-
pub struct CubePlanCostAndState {
330-
pub cost: CubePlanCost,
331-
pub state: CubePlanState,
332-
pub sort_state: SortState,
333-
}
334-
335-
impl PartialOrd for CubePlanCostAndState {
336-
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
337-
Some(self.cost.cmp(&other.cost))
338-
}
339-
}
340-
341-
impl Ord for CubePlanCostAndState {
342-
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
343-
self.cost.cmp(&other.cost)
344-
}
345-
}
346-
347-
impl CubePlanCostAndState {
348-
pub fn add_child(&self, other: &Self) -> Self {
349-
Self {
350-
cost: self.cost.add_child(&other.cost),
351-
state: self.state.add_child(&other.state),
352-
sort_state: self.sort_state.add_child(&other.sort_state),
353-
}
354-
}
355-
356-
pub fn finalize(&self, enode: &LogicalPlanLanguage) -> Self {
357-
Self {
358-
cost: self
359-
.cost
360-
.finalize(&self.state, &self.sort_state, enode, false),
361-
state: self.state.clone(),
362-
sort_state: self.sort_state.clone(),
363-
}
364-
}
365-
}
366-
367306
impl CubePlanCost {
368307
pub fn add_child(&self, other: &Self) -> Self {
369308
Self {
@@ -419,7 +358,6 @@ impl CubePlanCost {
419358
state: &CubePlanState,
420359
sort_state: &SortState,
421360
enode: &LogicalPlanLanguage,
422-
top_down: bool,
423361
) -> Self {
424362
Self {
425363
replacers: self.replacers,
@@ -440,7 +378,7 @@ impl CubePlanCost {
440378
},
441379
non_pushed_down_limit_sort: match sort_state {
442380
SortState::DirectChild => self.non_pushed_down_limit_sort,
443-
SortState::Current if top_down => self.non_pushed_down_limit_sort,
381+
SortState::Current => self.non_pushed_down_limit_sort,
444382
_ => 0,
445383
},
446384
// Don't track state here: we want representation that have fewer wrappers with zero members _in total_
@@ -495,54 +433,6 @@ impl CubePlanCost {
495433
}
496434
}
497435

498-
impl CostFunction<LogicalPlanLanguage> for BestCubePlan {
499-
type Cost = CubePlanCostAndState;
500-
fn cost<C>(&mut self, enode: &LogicalPlanLanguage, mut costs: C) -> Self::Cost
501-
where
502-
C: FnMut(Id) -> Self::Cost,
503-
{
504-
let ast_size_outside_wrapper = match enode {
505-
LogicalPlanLanguage::Aggregate(_) => 1,
506-
LogicalPlanLanguage::Projection(_) => 1,
507-
LogicalPlanLanguage::Limit(_) => 1,
508-
LogicalPlanLanguage::Sort(_) => 1,
509-
LogicalPlanLanguage::Filter(_) => 1,
510-
LogicalPlanLanguage::Join(_) => 1,
511-
LogicalPlanLanguage::CrossJoin(_) => 1,
512-
LogicalPlanLanguage::Union(_) => 1,
513-
LogicalPlanLanguage::Window(_) => 1,
514-
LogicalPlanLanguage::Subquery(_) => 1,
515-
LogicalPlanLanguage::Distinct(_) => 1,
516-
_ => 0,
517-
};
518-
519-
let cost = self.initial_cost(enode, false);
520-
let initial_cost = CubePlanCostAndState {
521-
cost,
522-
state: match enode {
523-
LogicalPlanLanguage::CubeScanWrapped(CubeScanWrapped(true)) => {
524-
CubePlanState::Wrapped
525-
}
526-
LogicalPlanLanguage::CubeScanWrapper(_) => CubePlanState::Wrapper,
527-
_ => CubePlanState::Unwrapped(ast_size_outside_wrapper),
528-
},
529-
sort_state: match enode {
530-
LogicalPlanLanguage::Sort(_) => SortState::Current,
531-
_ => SortState::None,
532-
},
533-
};
534-
let res = enode
535-
.children()
536-
.iter()
537-
.fold(initial_cost.clone(), |cost, id| {
538-
let child = costs(*id);
539-
cost.add_child(&child)
540-
})
541-
.finalize(enode);
542-
res
543-
}
544-
}
545-
546436
pub trait TopDownCost: Clone + Debug + PartialOrd {
547437
fn add(&self, other: &Self) -> Self;
548438
}
@@ -871,7 +761,7 @@ impl TopDownState<LogicalPlanLanguage> for CubePlanTopDownState {
871761

872762
impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost> for BestCubePlan {
873763
fn cost(&self, node: &LogicalPlanLanguage) -> CubePlanCost {
874-
self.initial_cost(node, true)
764+
self.initial_cost(node)
875765
}
876766

877767
fn finalize(
@@ -880,6 +770,6 @@ impl TopDownCostFunction<LogicalPlanLanguage, CubePlanTopDownState, CubePlanCost
880770
node: &LogicalPlanLanguage,
881771
state: &CubePlanTopDownState,
882772
) -> CubePlanCost {
883-
CubePlanCost::finalize(&cost, &state.wrapped, &state.limit, node, true)
773+
CubePlanCost::finalize(&cost, &state.wrapped, &state.limit, node)
884774
}
885775
}

rust/cubesql/cubesql/src/compile/rewrite/rewriter.rs

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::{
2222
use datafusion::{
2323
logical_plan::LogicalPlan, physical_plan::planner::DefaultPhysicalPlanner, scalar::ScalarValue,
2424
};
25-
use egg::{EGraph, Extractor, Id, IterationData, Language, Rewrite, Runner, StopReason};
25+
use egg::{EGraph, Id, IterationData, Language, Rewrite, Runner, StopReason};
2626
use serde::{Deserialize, Serialize};
2727
use std::{
2828
collections::{HashMap, HashSet},
@@ -329,7 +329,6 @@ impl Rewriter {
329329
auth_context: AuthContextRef,
330330
qtrace: &mut Option<Qtrace>,
331331
span_id: Option<Arc<SpanId>>,
332-
top_down_extractor: bool,
333332
) -> Result<LogicalPlan, CubeError> {
334333
let cube_context = self.cube_context.clone();
335334
let egraph = self.graph.clone();
@@ -349,26 +348,16 @@ impl Rewriter {
349348
let (runner, qtrace_egraph_iterations) =
350349
Self::run_rewrites(&cube_context, egraph, rules, "final")?;
351350

352-
let best = if top_down_extractor {
353-
let mut extractor = TopDownExtractor::new(
354-
&runner.egraph,
355-
BestCubePlan::new(cube_context.meta.clone()),
356-
CubePlanTopDownState::new(),
357-
);
358-
let Some((best_cost, best)) = extractor.find_best(root) else {
359-
return Err(CubeError::internal("Unable to find best plan".to_string()));
360-
};
361-
log::debug!("Best cost: {:#?}", best_cost);
362-
best
363-
} else {
364-
let extractor = Extractor::new(
365-
&runner.egraph,
366-
BestCubePlan::new(cube_context.meta.clone()),
367-
);
368-
let (best_cost, best) = extractor.find_best(root);
369-
log::debug!("Best cost: {:#?}", best_cost);
370-
best
351+
let mut extractor = TopDownExtractor::new(
352+
&runner.egraph,
353+
BestCubePlan::new(cube_context.meta.clone()),
354+
CubePlanTopDownState::new(),
355+
);
356+
let Some((best_cost, best)) = extractor.find_best(root) else {
357+
return Err(CubeError::internal("Unable to find best plan".to_string()));
371358
};
359+
log::debug!("Best cost: {:#?}", best_cost);
360+
372361
let qtrace_best_graph = if Qtrace::is_enabled() {
373362
best.as_ref().to_vec()
374363
} else {
@@ -461,12 +450,6 @@ impl Rewriter {
461450
.unwrap_or(true)
462451
}
463452

464-
pub fn top_down_extractor_enabled() -> bool {
465-
env::var("CUBESQL_TOP_DOWN_EXTRACTOR")
466-
.map(|v| v.to_lowercase() != "false")
467-
.unwrap_or(true)
468-
}
469-
470453
pub fn rewrite_rules(
471454
meta_context: Arc<MetaContext>,
472455
config_obj: Arc<dyn ConfigObj>,

rust/cubesql/cubesql/src/config/mod.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ pub trait ConfigObj: DIService + Debug {
115115
fn max_sessions(&self) -> usize;
116116

117117
fn no_implicit_order(&self) -> bool;
118-
119-
fn top_down_extractor(&self) -> bool;
120118
}
121119

122120
#[derive(Debug, Clone)]
@@ -137,7 +135,6 @@ pub struct ConfigObjImpl {
137135
pub non_streaming_query_max_row_limit: i32,
138136
pub max_sessions: usize,
139137
pub no_implicit_order: bool,
140-
pub top_down_extractor: bool,
141138
}
142139

143140
impl ConfigObjImpl {
@@ -175,7 +172,6 @@ impl ConfigObjImpl {
175172
non_streaming_query_max_row_limit: env_parse("CUBEJS_DB_QUERY_LIMIT", 50000),
176173
max_sessions: env_parse("CUBEJS_MAX_SESSIONS", 1024),
177174
no_implicit_order: env_parse("CUBESQL_SQL_NO_IMPLICIT_ORDER", true),
178-
top_down_extractor: env_parse("CUBESQL_TOP_DOWN_EXTRACTOR", true),
179175
}
180176
}
181177
}
@@ -242,10 +238,6 @@ impl ConfigObj for ConfigObjImpl {
242238
fn max_sessions(&self) -> usize {
243239
self.max_sessions
244240
}
245-
246-
fn top_down_extractor(&self) -> bool {
247-
self.top_down_extractor
248-
}
249241
}
250242

251243
impl Config {
@@ -278,7 +270,6 @@ impl Config {
278270
non_streaming_query_max_row_limit: 50000,
279271
max_sessions: 1024,
280272
no_implicit_order: true,
281-
top_down_extractor: true,
282273
}),
283274
}
284275
}

0 commit comments

Comments
 (0)