Skip to content

Commit e45516f

Browse files
committed
chore: optimize binary_expr_normalize + align stack to 8mb for testing on macOS
1 parent 53bdc95 commit e45516f

File tree

2 files changed

+89
-52
lines changed

2 files changed

+89
-52
lines changed

rust/cubesql/cubesql/src/compile/engine/df/optimizers/plan_normalize.rs

Lines changed: 45 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,23 +1218,15 @@ fn binary_expr_normalize(
12181218
// and cast the `DATE` to `TIMESTAMP` to match the types.
12191219
match (&left_type, &right_type) {
12201220
(DataType::Timestamp(_, _), DataType::Date32) => {
1221-
let new_right = Box::new(evaluate_expr(
1222-
optimizer,
1223-
right.cast_to(&left_type, schema)?,
1224-
)?);
12251221
return Ok(Box::new(Expr::BinaryExpr {
12261222
left,
12271223
op,
1228-
right: new_right,
1224+
right: evaluate_expr(optimizer, right.cast_to(&left_type, schema)?)?,
12291225
}));
12301226
}
12311227
(DataType::Date32, DataType::Timestamp(_, _)) => {
1232-
let new_left = Box::new(evaluate_expr(
1233-
optimizer,
1234-
left.cast_to(&right_type, schema)?,
1235-
)?);
12361228
return Ok(Box::new(Expr::BinaryExpr {
1237-
left: new_left,
1229+
left: evaluate_expr(optimizer, left.cast_to(&right_type, schema)?)?,
12381230
op,
12391231
right,
12401232
}));
@@ -1256,21 +1248,16 @@ fn binary_expr_normalize(
12561248
};
12571249

12581250
if literal_on_the_left {
1259-
let new_left = Box::new(evaluate_expr(optimizer, left.cast_to(&cast_type, schema)?)?);
12601251
Ok(Box::new(Expr::BinaryExpr {
1261-
left: new_left,
1252+
left: evaluate_expr(optimizer, left.cast_to(&cast_type, schema)?)?,
12621253
op,
12631254
right,
12641255
}))
12651256
} else {
1266-
let new_right = Box::new(evaluate_expr(
1267-
optimizer,
1268-
right.cast_to(&cast_type, schema)?,
1269-
)?);
12701257
Ok(Box::new(Expr::BinaryExpr {
12711258
left,
12721259
op,
1273-
right: new_right,
1260+
right: evaluate_expr(optimizer, right.cast_to(&cast_type, schema)?)?,
12741261
}))
12751262
}
12761263
}
@@ -1357,7 +1344,7 @@ fn in_list_expr_normalize(
13571344
return Ok(list_expr_normalized);
13581345
}
13591346

1360-
evaluate_expr(optimizer, list_expr_normalized.cast_to(&expr_type, schema)?)
1347+
evaluate_expr_stacked(optimizer, list_expr_normalized.cast_to(&expr_type, schema)?)
13611348
})
13621349
.collect::<Result<Vec<_>>>()?;
13631350

@@ -1368,18 +1355,22 @@ fn in_list_expr_normalize(
13681355
}))
13691356
}
13701357

1371-
/// Evaluates an expression to a constant if possible.
1372-
fn evaluate_expr(optimizer: &PlanNormalize, expr: Expr) -> Result<Expr> {
1358+
fn evaluate_expr_stacked(optimizer: &PlanNormalize, expr: Expr) -> Result<Expr> {
13731359
let execution_props = &optimizer.cube_ctx.state.execution_props;
13741360
let mut const_evaluator = ConstEvaluator::new(execution_props);
13751361
expr.rewrite(&mut const_evaluator)
13761362
}
13771363

1364+
/// Evaluates an expression to a constant if possible.
1365+
fn evaluate_expr(optimizer: &PlanNormalize, expr: Expr) -> Result<Box<Expr>> {
1366+
Ok(Box::new(evaluate_expr_stacked(optimizer, expr)?))
1367+
}
1368+
13781369
#[cfg(test)]
13791370
mod tests {
13801371
use super::*;
13811372
use crate::compile::test::{
1382-
get_test_tenant_ctx, rewrite_engine::create_test_postgresql_cube_context,
1373+
get_test_tenant_ctx, rewrite_engine::create_test_postgresql_cube_context, run_async_test,
13831374
};
13841375
use datafusion::{
13851376
arrow::datatypes::{DataType, Field, Schema},
@@ -1402,35 +1393,39 @@ mod tests {
14021393
expr
14031394
}
14041395

1405-
#[tokio::test]
1406-
async fn test_stack_overflow_deeply_nested_or() -> Result<()> {
1407-
let meta = get_test_tenant_ctx();
1408-
let cube_ctx = create_test_postgresql_cube_context(meta)
1409-
.await
1410-
.expect("Failed to create cube context");
1411-
1412-
// Create a simple table
1413-
let schema = Schema::new(vec![
1414-
Field::new("id", DataType::Int32, false),
1415-
Field::new("value", DataType::Int32, true),
1416-
]);
1417-
1418-
let table_scan = LogicalPlanBuilder::scan_empty(Some("test_table"), &schema, None)
1419-
.expect("Failed to create table scan")
1420-
.build()
1421-
.expect("Failed to build plan");
1422-
1423-
// Create a deeply nested OR expression (should cause stack overflow)
1424-
let deeply_nested_filter = create_deeply_nested_or_expr("value", 1_000);
1425-
1426-
let plan = LogicalPlanBuilder::from(table_scan)
1427-
.filter(deeply_nested_filter)
1428-
.expect("Failed to add filter")
1429-
.build()
1430-
.expect("Failed to build plan");
1431-
1432-
let optimizer = PlanNormalize::new(&cube_ctx);
1433-
optimizer.optimize(&plan, &OptimizerConfig::new())?;
1396+
// plan_normalize is recursive, at the same time ExprRewriter from DF is too
1397+
// let's guard it with test, that our code in dev profile is optimized to rewrite N nodes
1398+
#[test]
1399+
fn test_stack_overflow_deeply_nested_or() -> Result<()> {
1400+
run_async_test(async move {
1401+
let meta = get_test_tenant_ctx();
1402+
let cube_ctx = create_test_postgresql_cube_context(meta)
1403+
.await
1404+
.expect("Failed to create cube context");
1405+
1406+
// Create a simple table
1407+
let schema = Schema::new(vec![
1408+
Field::new("id", DataType::Int32, false),
1409+
Field::new("value", DataType::Int32, true),
1410+
]);
1411+
1412+
let table_scan = LogicalPlanBuilder::scan_empty(Some("test_table"), &schema, None)
1413+
.expect("Failed to create table scan")
1414+
.build()
1415+
.expect("Failed to build plan");
1416+
1417+
// Create a deeply nested OR expression (should cause stack overflow)
1418+
let deeply_nested_filter = create_deeply_nested_or_expr("value", 200);
1419+
1420+
let plan = LogicalPlanBuilder::from(table_scan)
1421+
.filter(deeply_nested_filter)
1422+
.expect("Failed to add filter")
1423+
.build()
1424+
.expect("Failed to build plan");
1425+
1426+
let optimizer = PlanNormalize::new(&cube_ctx);
1427+
optimizer.optimize(&plan, &OptimizerConfig::new()).unwrap();
1428+
});
14341429

14351430
Ok(())
14361431
}

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::{collections::HashMap, env, ops::Deref, sync::Arc};
2-
31
use super::{convert_sql_to_cube_query, CompilationResult, QueryPlan};
42
use crate::{
53
compile::{
@@ -23,6 +21,8 @@ use crate::{
2321
use async_trait::async_trait;
2422
use cubeclient::models::V1CubeMetaType;
2523
use datafusion::{arrow::datatypes::SchemaRef, dataframe::DataFrame as DFDataFrame};
24+
use std::future::Future;
25+
use std::{collections::HashMap, env, ops::Deref, sync::Arc};
2626
use uuid::Uuid;
2727

2828
pub mod rewrite_engine;
@@ -1201,3 +1201,45 @@ pub async fn convert_select_to_query_plan_with_meta(
12011201

12021202
query.unwrap()
12031203
}
1204+
1205+
pub struct AsyncTestOptions {
1206+
stack_bytes: usize,
1207+
}
1208+
1209+
impl Default for AsyncTestOptions {
1210+
fn default() -> Self {
1211+
Self {
1212+
// By default tokio stack size is aligned with OS default:
1213+
// 1mb for Windows, 2mb for macOS, 8mb for linux
1214+
// Let's align everything to 8mb
1215+
stack_bytes: 8 * 1024 * 1024,
1216+
}
1217+
}
1218+
}
1219+
1220+
pub fn run_async_test<F>(fut: F)
1221+
where
1222+
F: Future<Output = ()> + Send + 'static,
1223+
{
1224+
run_async_test_opt(AsyncTestOptions::default(), fut)
1225+
}
1226+
1227+
pub fn run_async_test_opt<F>(opts: AsyncTestOptions, fut: F)
1228+
where
1229+
F: Future<Output = ()> + Send + 'static,
1230+
{
1231+
std::thread::Builder::new()
1232+
.name("test-rt".into())
1233+
.stack_size(opts.stack_bytes)
1234+
.spawn(|| {
1235+
let rt = tokio::runtime::Builder::new_multi_thread()
1236+
.enable_all()
1237+
.build()
1238+
.unwrap();
1239+
1240+
rt.block_on(fut);
1241+
})
1242+
.unwrap()
1243+
.join()
1244+
.unwrap();
1245+
}

0 commit comments

Comments
 (0)