Skip to content

Commit 54d810c

Browse files
authored
Optimize performance bottleneck if projection is large (#56)
1 parent b88a762 commit 54d810c

File tree

1 file changed

+25
-21
lines changed

1 file changed

+25
-21
lines changed

src/rewrite/normal_form.rs

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ impl SpjNormalForm {
263263
source: Arc<dyn TableSource>,
264264
) -> Result<Option<LogicalPlan>> {
265265
log::trace!("rewriting from {qualifier}");
266-
let mut new_output_exprs = vec![];
266+
let mut new_output_exprs = Vec::with_capacity(self.output_exprs.len());
267267
// check that our output exprs are sub-expressions of the other one's output exprs
268268
for (i, output_expr) in self.output_exprs.iter().enumerate() {
269269
let new_output_expr = other
@@ -350,7 +350,11 @@ impl Predicate {
350350
let mut schema = DFSchema::empty();
351351
plan.apply(|plan| {
352352
if let LogicalPlan::TableScan(scan) = plan {
353-
schema = schema.join(&scan.projected_schema)?;
353+
schema = if schema.fields().is_empty() {
354+
(*scan.projected_schema).clone()
355+
} else {
356+
schema.join(&scan.projected_schema)?
357+
}
354358
}
355359

356360
Ok(TreeNodeRecursion::Continue)
@@ -367,14 +371,14 @@ impl Predicate {
367371
// Collect all referenced columns
368372
plan.apply(|plan| {
369373
if let LogicalPlan::TableScan(scan) = plan {
370-
for (i, column) in scan.projected_schema.columns().iter().enumerate() {
374+
for (i, (table_ref, field)) in scan.projected_schema.iter().enumerate() {
375+
let column = Column::new(table_ref.cloned(), field.name());
376+
let data_type = field.data_type();
371377
new.eq_classes
372378
.push(ColumnEquivalenceClass::new_singleton(column.clone()));
373-
new.eq_class_idx_by_column.insert(column.clone(), i);
379+
new.eq_class_idx_by_column.insert(column, i);
374380
new.ranges_by_equivalence_class
375-
.push(Some(Interval::make_unbounded(
376-
scan.projected_schema.data_type(column)?,
377-
)?));
381+
.push(Some(Interval::make_unbounded(data_type)?));
378382
}
379383
}
380384

@@ -954,7 +958,7 @@ mod test {
954958
let ctx = SessionContext::new();
955959

956960
ctx.sql(
957-
"CREATE TABLE t1 AS VALUES
961+
"CREATE TABLE t1 AS VALUES
958962
('2021', 3, 'A'),
959963
('2022', 4, 'B'),
960964
('2023', 5, 'C')",
@@ -1097,31 +1101,31 @@ mod test {
10971101
TestCase {
10981102
name: "example from paper",
10991103
base: "\
1100-
SELECT
1101-
l_orderkey,
1102-
o_custkey,
1104+
SELECT
1105+
l_orderkey,
1106+
o_custkey,
11031107
l_partkey,
11041108
l_shipdate, o_orderdate,
11051109
l_quantity*l_extendedprice AS gross_revenue
11061110
FROM example
11071111
WHERE
1108-
l_orderkey = o_orderkey AND
1109-
l_partkey = p_partkey AND
1110-
p_partkey >= 150 AND
1111-
o_custkey >= 50 AND
1112-
o_custkey <= 500 AND
1112+
l_orderkey = o_orderkey AND
1113+
l_partkey = p_partkey AND
1114+
p_partkey >= 150 AND
1115+
o_custkey >= 50 AND
1116+
o_custkey <= 500 AND
11131117
p_name LIKE '%abc%'
11141118
",
1115-
query: "SELECT
1116-
l_orderkey,
1117-
o_custkey,
1119+
query: "SELECT
1120+
l_orderkey,
1121+
o_custkey,
11181122
l_partkey,
11191123
l_quantity*l_extendedprice
11201124
FROM example
1121-
WHERE
1125+
WHERE
11221126
l_orderkey = o_orderkey AND
11231127
l_partkey = p_partkey AND
1124-
l_partkey >= 150 AND
1128+
l_partkey >= 150 AND
11251129
l_partkey <= 160 AND
11261130
o_custkey = 123 AND
11271131
o_orderdate = l_shipdate AND

0 commit comments

Comments
 (0)