Skip to content

Commit ce1089b

Browse files
committed
enhance: simplify x=x (apache#15387)
- if x is not nullable, x=x -> true - else, x=x -> x is NOT NULL OR NULL
1 parent 6afd539 commit ce1089b

File tree

3 files changed

+45
-8
lines changed

3 files changed

+45
-8
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,23 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
760760
None => lit_bool_null(),
761761
})
762762
}
763+
// A = A --> A IS NOT NULL OR NULL
764+
// A = A --> true (if A not nullable)
765+
Expr::BinaryExpr(BinaryExpr {
766+
left,
767+
op: Eq,
768+
right,
769+
}) if (left == right) & !left.is_volatile() => {
770+
Transformed::yes(match !info.nullable(&left)? {
771+
true => lit(true),
772+
false => Expr::BinaryExpr(BinaryExpr {
773+
left: Box::new(Expr::IsNotNull(left)),
774+
op: Or,
775+
right: Box::new(lit_bool_null()),
776+
}),
777+
})
778+
}
779+
763780
// Rules for NotEq
764781
//
765782

@@ -2152,6 +2169,17 @@ mod tests {
21522169
}
21532170
}
21542171

2172+
#[test]
2173+
fn test_simplify_eq_not_self() {
2174+
let expr_a = col("c2").eq(col("c2"));
2175+
let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2176+
let expected_a = col("c2").is_not_null().or(lit_bool_null());
2177+
let expected_b = lit(true);
2178+
2179+
assert_eq!(simplify(expr_a), expected_a);
2180+
assert_eq!(simplify(expr_b), expected_b);
2181+
}
2182+
21552183
#[test]
21562184
fn test_simplify_or_true() {
21572185
let expr_a = col("c2").or(lit(true));

datafusion/sqllogictest/test_files/array.slt

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6140,21 +6140,19 @@ logical_plan
61406140
02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
61416141
03)----SubqueryAlias: test
61426142
04)------SubqueryAlias: t
6143-
05)--------Projection:
6144-
06)----------Filter: __common_expr_3 = __common_expr_3
6145-
07)------------Projection: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) AS __common_expr_3
6146-
08)--------------TableScan: tmp_table projection=[value]
6143+
05)--------Projection:
6144+
06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL)
6145+
07)------------TableScan: tmp_table projection=[value]
61476146
physical_plan
61486147
01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
61496148
02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
61506149
03)----CoalescePartitionsExec
61516150
04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
61526151
05)--------ProjectionExec: expr=[]
61536152
06)----------CoalesceBatchesExec: target_batch_size=8192
6154-
07)------------FilterExec: __common_expr_3@0 = __common_expr_3@0
6155-
08)--------------ProjectionExec: expr=[substr(md5(CAST(value@0 AS Utf8)), 1, 32) as __common_expr_3]
6156-
09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
6157-
10)------------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
6153+
07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IS NOT NULL OR NULL
6154+
08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
6155+
09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
61586156

61596157
# any operator
61606158
query ?

datafusion/sqllogictest/test_files/simplify_expr.slt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,16 @@ query T
6363
select b from t where b !~ '.*'
6464
----
6565

66+
query TT
67+
explain select * from t where a = a;
68+
----
69+
logical_plan
70+
01)Filter: t.a IS NOT NULL OR Boolean(NULL)
71+
02)--TableScan: t projection=[a, b]
72+
physical_plan
73+
01)CoalesceBatchesExec: target_batch_size=8192
74+
02)--FilterExec: a@0 IS NOT NULL OR NULL
75+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
76+
6677
statement ok
6778
drop table t;

0 commit comments

Comments
 (0)