Skip to content

Commit 36e3415

Browse files
authored
Optimize replace_params_with_values (#13308)
* Add benchmark for `with_param_values` Target of this benchmark: control that placeholders replacing does not get slower, if the query does not contain placeholders at all. * Do not save original expression name if it does not contain placeholders Prior this patch, the original expression name was unconditionally preserved during the replacement of placeholders. However, in some cases, this is unnecessary. If the expression does not contain any placeholders, its name remains unchanged, and we do not need to preserve it. This patch adds the following optimization: - If during first pass (placeholders type inference) was investigated that there are no placeholders, do not save the original name, do not apply second pass (replacing). Just leave the expression as it is and return `Transformed::no(...)`.
1 parent 31d27c2 commit 36e3415

File tree

4 files changed

+51
-7
lines changed

4 files changed

+51
-7
lines changed

datafusion/core/benches/sql_planner.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ mod data_utils;
2424

2525
use crate::criterion::Criterion;
2626
use arrow::datatypes::{DataType, Field, Fields, Schema};
27+
use criterion::Bencher;
2728
use datafusion::datasource::MemTable;
2829
use datafusion::execution::context::SessionContext;
30+
use datafusion_common::ScalarValue;
2931
use itertools::Itertools;
3032
use std::fs::File;
3133
use std::io::{BufRead, BufReader};
@@ -122,6 +124,29 @@ fn register_clickbench_hits_table() -> SessionContext {
122124
ctx
123125
}
124126

127+
/// Target of this benchmark: control that placeholders replacing does not get slower,
128+
/// if the query does not contain placeholders at all.
129+
fn benchmark_with_param_values_many_columns(ctx: &SessionContext, b: &mut Bencher) {
130+
const COLUMNS_NUM: usize = 200;
131+
let mut aggregates = String::new();
132+
for i in 0..COLUMNS_NUM {
133+
if i > 0 {
134+
aggregates.push_str(", ");
135+
}
136+
aggregates.push_str(format!("MAX(a{})", i).as_str());
137+
}
138+
// SELECT max(attr0), ..., max(attrN) FROM t1.
139+
let query = format!("SELECT {} FROM t1", aggregates);
140+
let statement = ctx.state().sql_to_statement(&query, "Generic").unwrap();
141+
let rt = Runtime::new().unwrap();
142+
let plan =
143+
rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
144+
b.iter(|| {
145+
let plan = plan.clone();
146+
criterion::black_box(plan.with_param_values(vec![ScalarValue::from(1)]).unwrap());
147+
});
148+
}
149+
125150
fn criterion_benchmark(c: &mut Criterion) {
126151
// verify that we can load the clickbench data prior to running the benchmark
127152
if !PathBuf::from(format!("{BENCHMARKS_PATH_1}{CLICKBENCH_DATA_PATH}")).exists()
@@ -388,6 +413,10 @@ fn criterion_benchmark(c: &mut Criterion) {
388413
}
389414
})
390415
});
416+
417+
c.bench_function("with_param_values_many_columns", |b| {
418+
benchmark_with_param_values_many_columns(&ctx, b);
419+
});
391420
}
392421

393422
criterion_group!(benches, criterion_benchmark);

datafusion/expr/src/expr.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1598,7 +1598,11 @@ impl Expr {
15981598
///
15991599
/// For example, gicen an expression like `<int32> = $0` will infer `$0` to
16001600
/// have type `int32`.
1601-
pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<Expr> {
1601+
///
1602+
/// Returns transformed expression and flag that is true if expression contains
1603+
/// at least one placeholder.
1604+
pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> {
1605+
let mut has_placeholder = false;
16021606
self.transform(|mut expr| {
16031607
// Default to assuming the arguments are the same type
16041608
if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = &mut expr {
@@ -1615,9 +1619,13 @@ impl Expr {
16151619
rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?;
16161620
rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?;
16171621
}
1622+
if let Expr::Placeholder(_) = &expr {
1623+
has_placeholder = true;
1624+
}
16181625
Ok(Transformed::yes(expr))
16191626
})
16201627
.data()
1628+
.map(|data| (data, has_placeholder))
16211629
}
16221630

16231631
/// Returns true if some of this `exprs` subexpressions may not be evaluated

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,18 +1413,25 @@ impl LogicalPlan {
14131413
let schema = Arc::clone(plan.schema());
14141414
let name_preserver = NamePreserver::new(&plan);
14151415
plan.map_expressions(|e| {
1416-
let original_name = name_preserver.save(&e);
1417-
let transformed_expr =
1418-
e.infer_placeholder_types(&schema)?.transform_up(|e| {
1416+
let (e, has_placeholder) = e.infer_placeholder_types(&schema)?;
1417+
if !has_placeholder {
1418+
// Performance optimization:
1419+
// avoid NamePreserver copy and second pass over expression
1420+
// if no placeholders.
1421+
Ok(Transformed::no(e))
1422+
} else {
1423+
let original_name = name_preserver.save(&e);
1424+
let transformed_expr = e.transform_up(|e| {
14191425
if let Expr::Placeholder(Placeholder { id, .. }) = e {
14201426
let value = param_values.get_placeholders_with_values(&id)?;
14211427
Ok(Transformed::yes(Expr::Literal(value)))
14221428
} else {
14231429
Ok(Transformed::no(e))
14241430
}
14251431
})?;
1426-
// Preserve name to avoid breaking column references to this expression
1427-
Ok(transformed_expr.update_data(|expr| original_name.restore(expr)))
1432+
// Preserve name to avoid breaking column references to this expression
1433+
Ok(transformed_expr.update_data(|expr| original_name.restore(expr)))
1434+
}
14281435
})
14291436
})
14301437
.map(|res| res.data)

datafusion/sql/src/expr/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
140140
let mut expr = self.sql_expr_to_logical_expr(sql, schema, planner_context)?;
141141
expr = self.rewrite_partial_qualifier(expr, schema);
142142
self.validate_schema_satisfies_exprs(schema, &[expr.clone()])?;
143-
let expr = expr.infer_placeholder_types(schema)?;
143+
let (expr, _) = expr.infer_placeholder_types(schema)?;
144144
Ok(expr)
145145
}
146146

0 commit comments

Comments
 (0)