Skip to content

Commit 01c525d

Browse files
ovrigorlukanin
authored andcommitted
feat(cubesql): PlanNormalize - reduce stack allocations (split by path) (#10067)
1 parent bf06600 commit 01c525d

File tree

1 file changed

+87
-68
lines changed

1 file changed

+87
-68
lines changed

rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs

Lines changed: 87 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -266,18 +266,8 @@ fn plan_normalize(
266266
let on = on
267267
.iter()
268268
.map(|(left_column, right_column)| {
269-
let left_column = column_normalize(
270-
optimizer,
271-
left_column,
272-
remapped_columns,
273-
optimizer_config,
274-
)?;
275-
let right_column = column_normalize(
276-
optimizer,
277-
right_column,
278-
&right_remapped_columns,
279-
optimizer_config,
280-
)?;
269+
let left_column = column_normalize(left_column, remapped_columns)?;
270+
let right_column = column_normalize(right_column, &right_remapped_columns)?;
281271
Ok((left_column, right_column))
282272
})
283273
.collect::<Result<Vec<_>>>()?;
@@ -595,6 +585,7 @@ fn expr_normalize_stacked(
595585
}
596586

597587
/// Recursively normalizes expressions.
588+
#[inline(never)]
598589
fn expr_normalize(
599590
optimizer: &PlanNormalize,
600591
expr: &Expr,
@@ -603,27 +594,34 @@ fn expr_normalize(
603594
optimizer_config: &OptimizerConfig,
604595
) -> Result<Box<Expr>> {
605596
match expr {
597+
e @ Expr::ScalarVariable(..) => Ok(Box::new(e.clone())),
598+
e @ Expr::Literal(..) => Ok(Box::new(e.clone())),
606599
Expr::Alias(expr, alias) => {
607600
let expr = expr_normalize(optimizer, expr, schema, remapped_columns, optimizer_config)?;
608601
let alias = alias.clone();
609602
Ok(Box::new(Expr::Alias(expr, alias)))
610603
}
611-
612604
Expr::OuterColumn(data_type, column) => {
613605
let data_type = data_type.clone();
614-
let column = column_normalize(optimizer, column, remapped_columns, optimizer_config)?;
606+
let column = column_normalize(column, remapped_columns)?;
615607
Ok(Box::new(Expr::OuterColumn(data_type, column)))
616608
}
617-
618609
Expr::Column(column) => {
619-
let column = column_normalize(optimizer, column, remapped_columns, optimizer_config)?;
610+
let column = column_normalize(column, remapped_columns)?;
620611
Ok(Box::new(Expr::Column(column)))
621612
}
613+
Expr::Cast { expr, data_type } => {
614+
let expr = expr_normalize(optimizer, expr, schema, remapped_columns, optimizer_config)?;
615+
let data_type = data_type.clone();
616+
Ok(Box::new(Expr::Cast { expr, data_type }))
617+
}
618+
Expr::TryCast { expr, data_type } => {
619+
let expr = expr_normalize(optimizer, expr, schema, remapped_columns, optimizer_config)?;
620+
let data_type = data_type.clone();
621+
Ok(Box::new(Expr::TryCast { expr, data_type }))
622+
}
622623

623-
e @ Expr::ScalarVariable(..) => Ok(Box::new(e.clone())),
624-
625-
e @ Expr::Literal(..) => Ok(Box::new(e.clone())),
626-
624+
// Deep nested node, use as a hot path
627625
Expr::BinaryExpr { left, op, right } => binary_expr_normalize(
628626
optimizer,
629627
left,
@@ -633,6 +631,58 @@ fn expr_normalize(
633631
remapped_columns,
634632
optimizer_config,
635633
),
634+
// Deep nested node, use as a hot path
635+
Expr::InList {
636+
expr,
637+
list,
638+
negated,
639+
} => in_list_expr_normalize(
640+
optimizer,
641+
expr,
642+
list,
643+
*negated,
644+
schema,
645+
remapped_columns,
646+
optimizer_config,
647+
),
648+
649+
// See expr_normalize_cold_path, for explanation.
650+
other => {
651+
expr_normalize_cold_path(optimizer, other, schema, remapped_columns, optimizer_config)
652+
}
653+
}
654+
}
655+
656+
/// Cold path for expression normalization, handling less common expression variants.
657+
///
658+
/// This function is separated from `expr_normalize` to reduce stack usage in the hot path.
659+
/// When matching on the large `Expr` enum, LLVM pre-allocates stack space for all variants'
660+
/// temporaries in a single function. This results in ~13KB of stack allocations (215 alloca
661+
/// instructions) per call in release mode. By splitting the enum match into hot and cold paths
662+
/// with `#[inline(never)]`, we ensure that common queries only pay the cost of the hot path
663+
/// (~1.5KB with 29 allocations), while rare expression types are handled here.
664+
///
665+
/// This optimization is critical for deeply nested expressions, as it reduces stack usage
666+
/// by ~87% for typical queries, preventing stack overflow on recursive expression trees.
667+
#[inline(never)]
668+
fn expr_normalize_cold_path(
669+
optimizer: &PlanNormalize,
670+
expr: &Expr,
671+
schema: &DFSchema,
672+
remapped_columns: &HashMap<Column, Column>,
673+
optimizer_config: &OptimizerConfig,
674+
) -> Result<Box<Expr>> {
675+
match expr {
676+
// These nodes are used in the hot path
677+
Expr::Alias(..) => unreachable!("Alias in a cold path"),
678+
Expr::OuterColumn(..) => unreachable!("OuterColumn in a cold path"),
679+
Expr::Column(..) => unreachable!("Column in a cold path"),
680+
Expr::ScalarVariable(..) => unreachable!("ScalarVariable in a cold path"),
681+
Expr::Literal(..) => unreachable!("Literal in a cold path"),
682+
Expr::BinaryExpr { .. } => unreachable!("BinaryExpr in a cold path"),
683+
Expr::InList { .. } => unreachable!("InList in a cold path"),
684+
Expr::Cast { .. } => unreachable!("Cast in a cold path"),
685+
Expr::TryCast { .. } => unreachable!("TryCast in a cold path"),
636686

637687
Expr::AnyExpr {
638688
left,
@@ -810,18 +860,6 @@ fn expr_normalize(
810860
}))
811861
}
812862

813-
Expr::Cast { expr, data_type } => {
814-
let expr = expr_normalize(optimizer, expr, schema, remapped_columns, optimizer_config)?;
815-
let data_type = data_type.clone();
816-
Ok(Box::new(Expr::Cast { expr, data_type }))
817-
}
818-
819-
Expr::TryCast { expr, data_type } => {
820-
let expr = expr_normalize(optimizer, expr, schema, remapped_columns, optimizer_config)?;
821-
let data_type = data_type.clone();
822-
Ok(Box::new(Expr::TryCast { expr, data_type }))
823-
}
824-
825863
Expr::Sort {
826864
expr,
827865
asc,
@@ -837,18 +875,14 @@ fn expr_normalize(
837875
}))
838876
}
839877

840-
Expr::ScalarFunction { fun, args } => {
841-
let (fun, args) = scalar_function_normalize(
842-
optimizer,
843-
fun,
844-
args,
845-
schema,
846-
remapped_columns,
847-
optimizer_config,
848-
)?;
849-
850-
Ok(Box::new(Expr::ScalarFunction { fun, args }))
851-
}
878+
Expr::ScalarFunction { fun, args } => scalar_function_normalize(
879+
optimizer,
880+
fun,
881+
args,
882+
schema,
883+
remapped_columns,
884+
optimizer_config,
885+
),
852886

853887
Expr::ScalarUDF { fun, args } => {
854888
let fun = Arc::clone(fun);
@@ -1001,20 +1035,6 @@ fn expr_normalize(
10011035
Ok(Box::new(Expr::AggregateUDF { fun, args }))
10021036
}
10031037

1004-
Expr::InList {
1005-
expr,
1006-
list,
1007-
negated,
1008-
} => in_list_expr_normalize(
1009-
optimizer,
1010-
expr,
1011-
list,
1012-
*negated,
1013-
schema,
1014-
remapped_columns,
1015-
optimizer_config,
1016-
),
1017-
10181038
Expr::InSubquery {
10191039
expr,
10201040
subquery,
@@ -1051,12 +1071,8 @@ fn expr_normalize(
10511071
}
10521072

10531073
/// Normalizes columns, taking remapped columns into account.
1054-
fn column_normalize(
1055-
_optimizer: &PlanNormalize,
1056-
column: &Column,
1057-
remapped_columns: &HashMap<Column, Column>,
1058-
_optimizer_config: &OptimizerConfig,
1059-
) -> Result<Column> {
1074+
#[inline(always)]
1075+
fn column_normalize(column: &Column, remapped_columns: &HashMap<Column, Column>) -> Result<Column> {
10601076
if let Some(new_column) = remapped_columns.get(column) {
10611077
return Ok(new_column.clone());
10621078
}
@@ -1073,7 +1089,7 @@ fn scalar_function_normalize(
10731089
schema: &DFSchema,
10741090
remapped_columns: &HashMap<Column, Column>,
10751091
optimizer_config: &OptimizerConfig,
1076-
) -> Result<(BuiltinScalarFunction, Vec<Expr>)> {
1092+
) -> Result<Box<Expr>> {
10771093
let fun = fun.clone();
10781094
let mut args = args
10791095
.iter()
@@ -1099,7 +1115,7 @@ fn scalar_function_normalize(
10991115
}
11001116
}
11011117

1102-
Ok((fun, args))
1118+
Ok(Box::new(Expr::ScalarFunction { fun, args }))
11031119
}
11041120

11051121
/// Recursively normalizes grouping sets.
@@ -1177,6 +1193,7 @@ fn grouping_set_normalize(
11771193
/// - binary operations between a literal string and an expression
11781194
/// of a different type to a string casted to that type
11791195
/// - binary operations between a timestamp and a date to a timestamp and timestamp operation
1196+
#[inline(never)]
11801197
fn binary_expr_normalize(
11811198
optimizer: &PlanNormalize,
11821199
left: &Expr,
@@ -1313,6 +1330,7 @@ fn binary_expr_cast_literal(op: &Operator, other_type: &DataType) -> Option<Data
13131330
/// Currently this includes replacing:
13141331
/// - IN list expressions where expression being tested is `TIMESTAMP`
13151332
/// and values are `DATE` to values casted to `TIMESTAMP`
1333+
#[inline(never)]
13161334
fn in_list_expr_normalize(
13171335
optimizer: &PlanNormalize,
13181336
expr: &Expr,
@@ -1362,6 +1380,7 @@ fn evaluate_expr_stacked(optimizer: &PlanNormalize, expr: Expr) -> Result<Expr>
13621380
}
13631381

13641382
/// Evaluates an expression to a constant if possible.
1383+
#[inline(never)]
13651384
fn evaluate_expr(optimizer: &PlanNormalize, expr: Expr) -> Result<Box<Expr>> {
13661385
Ok(Box::new(evaluate_expr_stacked(optimizer, expr)?))
13671386
}
@@ -1414,8 +1433,8 @@ mod tests {
14141433
.build()
14151434
.expect("Failed to build plan");
14161435

1417-
// Create a deeply nested OR expression (should cause stack overflow)
1418-
let deeply_nested_filter = create_deeply_nested_or_expr("value", 200);
1436+
// Create a deeply nested OR expression
1437+
let deeply_nested_filter = create_deeply_nested_or_expr("value", 500);
14191438

14201439
let plan = LogicalPlanBuilder::from(table_scan)
14211440
.filter(deeply_nested_filter)

0 commit comments

Comments
 (0)