Skip to content

Commit 44906b2

Browse files
committed
enhance: simplify x=x (apache#15387)
- if x is not nullable, x=x -> true - else, x=x -> x is NOT NULL OR NULL
1 parent 6afd539 commit 44906b2

File tree

3 files changed

+51
-8
lines changed

3 files changed

+51
-8
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,25 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
760760
None => lit_bool_null(),
761761
})
762762
}
763+
// According to SQL's null semantics, NULL = NULL evaluates to NULL
764+
// Both sides are the same expression (A = A) and A is non-volatile expression
765+
// A = A --> A IS NOT NULL OR NULL
766+
// A = A --> true (if A not nullable)
767+
Expr::BinaryExpr(BinaryExpr {
768+
left,
769+
op: Eq,
770+
right,
771+
}) if (left == right) & !left.is_volatile() => {
772+
Transformed::yes(match !info.nullable(&left)? {
773+
true => lit(true),
774+
false => Expr::BinaryExpr(BinaryExpr {
775+
left: Box::new(Expr::IsNotNull(left)),
776+
op: Or,
777+
right: Box::new(lit_bool_null()),
778+
}),
779+
})
780+
}
781+
763782
// Rules for NotEq
764783
//
765784

@@ -2152,6 +2171,21 @@ mod tests {
21522171
}
21532172
}
21542173

2174+
#[test]
2175+
fn test_simplify_eq_not_self() {
2176+
// `expr_a`: column `c2` is nullable, so `c2 = c2` simplifies to `c2 IS NOT NULL OR NULL`
2177+
// This ensures the expression is only true when `c2` is not NULL, accounting for SQL's NULL semantics.
2178+
let expr_a = col("c2").eq(col("c2"));
2179+
let expected_a = col("c2").is_not_null().or(lit_bool_null());
2180+
2181+
// `expr_b`: column `c2_non_null` is explicitly non-nullable, so `c2_non_null = c2_non_null` is always true
2182+
let expr_b = col("c2_non_null").eq(col("c2_non_null"));
2183+
let expected_b = lit(true);
2184+
2185+
assert_eq!(simplify(expr_a), expected_a);
2186+
assert_eq!(simplify(expr_b), expected_b);
2187+
}
2188+
21552189
#[test]
21562190
fn test_simplify_or_true() {
21572191
let expr_a = col("c2").or(lit(true));

datafusion/sqllogictest/test_files/array.slt

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6140,21 +6140,19 @@ logical_plan
61406140
02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
61416141
03)----SubqueryAlias: test
61426142
04)------SubqueryAlias: t
6143-
05)--------Projection:
6144-
06)----------Filter: __common_expr_3 = __common_expr_3
6145-
07)------------Projection: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) AS __common_expr_3
6146-
08)--------------TableScan: tmp_table projection=[value]
6143+
05)--------Projection:
6144+
06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL)
6145+
07)------------TableScan: tmp_table projection=[value]
61476146
physical_plan
61486147
01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
61496148
02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
61506149
03)----CoalescePartitionsExec
61516150
04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
61526151
05)--------ProjectionExec: expr=[]
61536152
06)----------CoalesceBatchesExec: target_batch_size=8192
6154-
07)------------FilterExec: __common_expr_3@0 = __common_expr_3@0
6155-
08)--------------ProjectionExec: expr=[substr(md5(CAST(value@0 AS Utf8)), 1, 32) as __common_expr_3]
6156-
09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
6157-
10)------------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
6153+
07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IS NOT NULL OR NULL
6154+
08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
6155+
09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
61586156

61596157
# any operator
61606158
query ?

datafusion/sqllogictest/test_files/simplify_expr.slt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,16 @@ query T
6363
select b from t where b !~ '.*'
6464
----
6565

66+
query TT
67+
explain select * from t where a = a;
68+
----
69+
logical_plan
70+
01)Filter: t.a IS NOT NULL OR Boolean(NULL)
71+
02)--TableScan: t projection=[a, b]
72+
physical_plan
73+
01)CoalesceBatchesExec: target_batch_size=8192
74+
02)--FilterExec: a@0 IS NOT NULL OR NULL
75+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
76+
6677
statement ok
6778
drop table t;

0 commit comments

Comments
 (0)