Skip to content

Commit 053dc9b

Browse files
committed
Optimize ~! '.*' and '.*' cases to False instead of Eq empty str condition (fixes old behavior)
1 parent a3aa630 commit 053dc9b

File tree

3 files changed

+16
-29
lines changed

3 files changed

+16
-29
lines changed

datafusion/optimizer/src/simplify_expressions/regex.rs

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const ANY_CHAR_REGEX_PATTERN: &str = ".*";
4242
/// - partial anchored regex patterns (e.g. `^foo`) to `LIKE 'foo%'`
4343
/// - combinations (alternatives) of the above, will be concatenated with `OR` or `AND`
4444
/// - `EQ .*` to NotNull
45-
/// - `NE .*` means IS EMPTY
45+
/// - `NE .*` to false (.* matches any string, and NULL !~ results in NULL so NOT match can never be true)
4646
///
4747
/// Dev note: unit tests of this function are in `expr_simplifier.rs`, case `test_simplify_regex`.
4848
pub fn simplify_regex_expr(
@@ -71,13 +71,8 @@ pub fn simplify_regex_expr(
7171
// Handle the special case for ".*" pattern
7272
if pattern == ANY_CHAR_REGEX_PATTERN {
7373
let new_expr = if mode.not {
74-
// not empty
75-
let empty_lit = Box::new(string_scalar.to_expr(""));
76-
Expr::BinaryExpr(BinaryExpr {
77-
left,
78-
op: Operator::Eq,
79-
right: empty_lit,
80-
})
74+
// Always false.
75+
lit(false)
8176
} else {
8277
// not null
8378
left.is_not_null()
@@ -100,13 +95,7 @@ pub fn simplify_regex_expr(
10095
(false, true) => left.is_not_null(),
10196
// not (contains(left, inner))
10297
(true, false) => Expr::Not(Box::new(contains(*left, lit(inner)))),
103-
(true, true) => {
104-
return Ok(Transformed::no(Expr::BinaryExpr(BinaryExpr {
105-
left,
106-
op,
107-
right,
108-
})));
109-
}
98+
(true, true) => lit(false), // "!~ '.*'" is always false.
11099
};
111100
return Ok(Transformed::yes(new_expr));
112101
}

datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -883,17 +883,17 @@ mod tests {
883883
"
884884
)?;
885885

886-
// Test `!= ".*"` transforms to checking if the column is empty
886+
// Test `!= ".*"` transforms to false (.* matches any string, so NOT match is always false)
887887
let plan = LogicalPlanBuilder::from(table_scan.clone())
888888
.filter(binary_expr(col("a"), Operator::RegexNotMatch, lit(".*")))?
889889
.build()?;
890890

891891
assert_optimized_plan_equal!(
892892
plan,
893-
@ r#"
894-
Filter: test.a = Utf8("")
893+
@ r"
894+
Filter: Boolean(false)
895895
TableScan: test
896-
"#
896+
"
897897
)?;
898898

899899
// Test case-insensitive versions
@@ -911,17 +911,17 @@ mod tests {
911911
"
912912
)?;
913913

914-
// Test `!~ ".*"` (case-insensitive) transforms to checking if the column is empty
914+
// Test `!~ ".*"` (case-insensitive) transforms to false (.* matches any string, so NOT match is always false)
915915
let plan = LogicalPlanBuilder::from(table_scan.clone())
916916
.filter(binary_expr(col("a"), Operator::RegexNotIMatch, lit(".*")))?
917917
.build()?;
918918

919919
assert_optimized_plan_equal!(
920920
plan,
921-
@ r#"
922-
Filter: test.a = Utf8("")
921+
@ r"
922+
Filter: Boolean(false)
923923
TableScan: test
924-
"#
924+
"
925925
)
926926
}
927927

datafusion/sqllogictest/test_files/simplify_expr.slt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,8 @@ physical_plan
4343
query TT
4444
explain select b from t where b !~ '.*'
4545
----
46-
logical_plan
47-
01)Filter: t.b = Utf8View("")
48-
02)--TableScan: t projection=[b]
49-
physical_plan
50-
01)FilterExec: b@0 =
51-
02)--DataSourceExec: partitions=1, partition_sizes=[1]
46+
logical_plan EmptyRelation: rows=0
47+
physical_plan EmptyExec
5248

5349
query T
5450
select b from t where b ~ '.*'
@@ -58,6 +54,7 @@ c
5854

5955
query T
6056
select b from t where b !~ '.*'
57+
----
6158

6259
# test regex .*literal.* simplifies to contains()
6360
query TT
@@ -108,6 +105,7 @@ c
108105

109106
query T
110107
select b from t where b !~ '.*.*'
108+
----
111109

112110
query TT
113111
explain select * from t where a = a;

0 commit comments

Comments
 (0)