Skip to content

Commit 631cbac

Browse files
Integrate LIKE/RLIKE LIST with ReplaceStringCasingWithInsensitiveRegexMatch rule (elastic#131531)
Allow LIKE LIST and RLIKE LIST to take advantage of ReplaceStringCasingWithInsensitiveRegexMatch for optimizations. Add unit tests to confirm the change works and the results are correct. The following is pushed down as just first_name RLIKE ("G.*") FROM employees | WHERE TO_UPPER(first_name) RLIKE ("G.*", "a.*", "bE.*") The following is pushed down as just first_name LIKE ("G.*") FROM employees | WHERE TO_UPPER(TO_LOWER(TO_UPPER(first_name))) LIKE ("G.*", "a.*", "bE.*")
1 parent 81606a3 commit 631cbac

File tree

9 files changed

+324
-26
lines changed

9 files changed

+324
-26
lines changed

docs/changelog/131531.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 131531
2+
summary: Integrate LIKE/RLIKE LIST with `ReplaceStringCasingWithInsensitiveRegexMatch`
3+
rule
4+
area: ES|QL
5+
type: enhancement
6+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RegexMatch.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.xpack.esql.core.type.DataType;
1616

1717
import java.util.Objects;
18+
import java.util.function.Predicate;
1819

1920
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
2021
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isStringAndExact;
@@ -67,6 +68,14 @@ public Boolean fold(FoldContext ctx) {
6768
throw new UnsupportedOperationException();
6869
}
6970

71+
/**
72+
* Returns an equivalent optimized expression taking into account the case of the pattern(s)
73+
* @param unwrappedField the field with to_upper/to_lower function removed
74+
* @param matchesCaseFn a predicate to check if a pattern matches the case
75+
* @return an optimized equivalent Expression or this if no optimization is possible
76+
*/
77+
public abstract Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn);
78+
7079
@Override
7180
public boolean equals(Object obj) {
7281
if (super.equals(obj)) {

x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,114 @@ emp_no:integer | first_name:keyword
534534
10055 | Georgy
535535
;
536536

537+
likeListWithUpperAllLower
538+
required_capability: like_with_list_of_patterns
539+
FROM employees
540+
| WHERE TO_UPPER(first_name) LIKE ("geor*", "wei*")
541+
| KEEP emp_no, first_name
542+
| SORT emp_no;
543+
544+
emp_no:integer | first_name:keyword
545+
;
546+
547+
likeListWithUpperAllUpper
548+
required_capability: like_with_list_of_patterns
549+
FROM employees
550+
| WHERE TO_UPPER(first_name) LIKE ("GEOR*", "WEI*")
551+
| KEEP emp_no, first_name
552+
| SORT emp_no;
553+
554+
emp_no:integer | first_name:keyword
555+
10001 | Georgi
556+
10040 | Weiyi
557+
10055 | Georgy
558+
;
559+
560+
likeListWithUpperMixedCase
561+
required_capability: like_with_list_of_patterns
562+
FROM employees
563+
| WHERE TO_UPPER(first_name) LIKE ("GeOr*", "wEiY*", "bErNi")
564+
| KEEP emp_no, first_name
565+
| SORT emp_no;
566+
567+
emp_no:integer | first_name:keyword
568+
;
569+
570+
likeListWithUpperMultiplePatternsMixedCase
571+
required_capability: like_with_list_of_patterns
572+
FROM employees
573+
| WHERE TO_UPPER(first_name) LIKE ("geor*", "WEIYI*", "bErnI*")
574+
| KEEP emp_no, first_name
575+
| SORT emp_no;
576+
577+
emp_no:integer | first_name:keyword
578+
10040 | Weiyi
579+
;
580+
581+
likeListWithUpperNoMatch
582+
required_capability: like_with_list_of_patterns
583+
FROM employees
584+
| WHERE TO_UPPER(first_name) LIKE ("notaname*")
585+
| KEEP emp_no, first_name
586+
| SORT emp_no;
587+
588+
emp_no:integer | first_name:keyword
589+
;
590+
591+
rlikeListWithUpperAllLower
592+
required_capability: rlike_with_list_of_patterns
593+
FROM employees
594+
| WHERE TO_UPPER(first_name) RLIKE ("geor.*", "wei.*")
595+
| KEEP emp_no, first_name
596+
| SORT emp_no;
597+
598+
emp_no:integer | first_name:keyword
599+
;
600+
601+
rlikeListWithUpperAllUpper
602+
required_capability: rlike_with_list_of_patterns
603+
FROM employees
604+
| WHERE TO_UPPER(first_name) RLIKE ("GEOR.*", "WEI.*")
605+
| KEEP emp_no, first_name
606+
| SORT emp_no;
607+
608+
emp_no:integer | first_name:keyword
609+
10001 | Georgi
610+
10040 | Weiyi
611+
10055 | Georgy
612+
;
613+
614+
rlikeListWithUpperMixedCase
615+
required_capability: rlike_with_list_of_patterns
616+
FROM employees
617+
| WHERE TO_UPPER(first_name) RLIKE ("GeOr.*", "wEiY.*", "bErNi")
618+
| KEEP emp_no, first_name
619+
| SORT emp_no;
620+
621+
emp_no:integer | first_name:keyword
622+
;
623+
624+
rlikeListWithUpperMultiplePatternsMixedCase
625+
required_capability: rlike_with_list_of_patterns
626+
FROM employees
627+
| WHERE TO_UPPER(first_name) RLIKE ("geor*", "WEIYI.*", "bErnI.*")
628+
| KEEP emp_no, first_name
629+
| SORT emp_no;
630+
631+
emp_no:integer | first_name:keyword
632+
10040 | Weiyi
633+
;
634+
635+
rlikeListWithUpperNoMatch
636+
required_capability: rlike_with_list_of_patterns
637+
FROM employees
638+
| WHERE TO_UPPER(first_name) RLIKE ("notaname.*")
639+
| KEEP emp_no, first_name
640+
| SORT emp_no;
641+
642+
emp_no:integer | first_name:keyword
643+
;
644+
537645
rlikeListEmptyArgWildcard
538646
required_capability: rlike_with_list_of_patterns
539647
FROM employees
@@ -1336,3 +1444,26 @@ ROW x = "abc" | EVAL bool = x RLIKE "#"
13361444
x:keyword | bool:boolean
13371445
abc | false
13381446
;
1447+
1448+
rlikeWithLowerTurnedInsensitiveUnicode#[skip:-8.12.99]
1449+
FROM airport_city_boundaries
1450+
| WHERE TO_UPPER(region) RLIKE ".*Л.*" and abbrev == "FRU"
1451+
| KEEP region
1452+
| LIMIT 1
1453+
;
1454+
1455+
region:text
1456+
Свердлов району
1457+
;
1458+
1459+
rlikeListWithLowerTurnedInsensitiveUnicode
1460+
required_capability: rlike_with_list_of_patterns
1461+
FROM airport_city_boundaries
1462+
| WHERE TO_UPPER(region) RLIKE (".*Л.*", ".*NOT EXISTS.*") and abbrev == "FRU"
1463+
| KEEP region
1464+
| LIMIT 1
1465+
;
1466+
1467+
region:text
1468+
Свердлов району
1469+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/RLike.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.xpack.esql.core.expression.Expression;
14+
import org.elasticsearch.xpack.esql.core.expression.Literal;
1415
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePattern;
1516
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
1617
import org.elasticsearch.xpack.esql.core.querydsl.query.RegexQuery;
@@ -24,6 +25,7 @@
2425
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2526

2627
import java.io.IOException;
28+
import java.util.function.Predicate;
2729

2830
public class RLike extends RegexMatch<RLikePattern> {
2931
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "RLike", RLike::new);
@@ -108,4 +110,16 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
108110
// TODO: see whether escaping is needed
109111
return new RegexQuery(source(), handler.nameOf(fa.exactAttribute()), pattern().asJavaRegex(), caseInsensitive());
110112
}
113+
114+
/**
115+
* Pushes down string casing optimization for a single pattern using the provided predicate.
116+
* Returns a new RLike with case insensitivity or a Literal.FALSE if not matched.
117+
*/
118+
@Override
119+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
120+
if (matchesCaseFn.test(pattern().pattern()) == false) {
121+
return Literal.of(this, Boolean.FALSE);
122+
}
123+
return new RLike(source(), unwrappedField, pattern(), true);
124+
}
111125
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/RLikeList.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.index.query.SearchExecutionContext;
1818
import org.elasticsearch.xpack.esql.core.expression.Expression;
1919
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
20+
import org.elasticsearch.xpack.esql.core.expression.Literal;
2021
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePattern;
2122
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePatternList;
2223
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
@@ -29,6 +30,8 @@
2930
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
3031

3132
import java.io.IOException;
33+
import java.util.List;
34+
import java.util.function.Predicate;
3235
import java.util.function.Supplier;
3336
import java.util.stream.Collectors;
3437

@@ -145,6 +148,23 @@ public org.apache.lucene.search.Query asLuceneQuery(
145148
);
146149
}
147150

151+
/**
152+
* Pushes down string casing optimization by filtering patterns using the provided predicate.
153+
* Returns a new RegexMatch or a Literal.FALSE if none match.
154+
*/
155+
@Override
156+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
157+
List<RLikePattern> filtered = pattern().patternList()
158+
.stream()
159+
.filter(p -> matchesCaseFn.test(p.pattern()))
160+
.collect(Collectors.toList());
161+
// none of the patterns matches the case of the field, return false
162+
if (filtered.isEmpty()) {
163+
return Literal.of(this, Boolean.FALSE);
164+
}
165+
return new RLikeList(source(), unwrappedField, new RLikePatternList(filtered), true);
166+
}
167+
148168
@Override
149169
protected NodeInfo<? extends Expression> info() {
150170
return NodeInfo.create(this, RLikeList::new, field(), pattern(), caseInsensitive());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLike.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.xpack.esql.core.expression.Expression;
1414
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
15+
import org.elasticsearch.xpack.esql.core.expression.Literal;
1516
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
1617
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
1718
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
@@ -25,6 +26,7 @@
2526
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2627

2728
import java.io.IOException;
29+
import java.util.function.Predicate;
2830

2931
public class WildcardLike extends RegexMatch<WildcardPattern> {
3032
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
@@ -132,4 +134,16 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
132134
private Query translateField(String targetFieldName, boolean forceStringMatch) {
133135
return new WildcardQuery(source(), targetFieldName, pattern().asLuceneWildcard(), caseInsensitive(), forceStringMatch);
134136
}
137+
138+
/**
139+
* Pushes down string casing optimization for a single pattern using the provided predicate.
140+
* Returns a new WildcardLike with case insensitivity or a Literal.FALSE if not matched.
141+
*/
142+
@Override
143+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
144+
if (matchesCaseFn.test(pattern().pattern()) == false) {
145+
return Literal.of(this, Boolean.FALSE);
146+
}
147+
return new WildcardLike(source(), unwrappedField, pattern(), true);
148+
}
135149
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLikeList.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.xpack.esql.core.expression.Expression;
2020
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.Literal;
2122
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
2223
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
2324
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
@@ -31,6 +32,8 @@
3132
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
3233

3334
import java.io.IOException;
35+
import java.util.List;
36+
import java.util.function.Predicate;
3437
import java.util.function.Supplier;
3538
import java.util.stream.Collectors;
3639

@@ -177,4 +180,21 @@ private String getLuceneQueryDescription() {
177180
private Query translateField(String targetFieldName) {
178181
return new ExpressionQuery(source(), targetFieldName, this);
179182
}
183+
184+
/**
185+
* Pushes down string casing optimization by filtering patterns using the provided predicate.
186+
* Returns a new RegexMatch or a Literal.FALSE if none match.
187+
*/
188+
@Override
189+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
190+
List<WildcardPattern> filtered = pattern().patternList()
191+
.stream()
192+
.filter(p -> matchesCaseFn.test(p.pattern()))
193+
.collect(Collectors.toList());
194+
// none of the patterns matches the case of the field, return false
195+
if (filtered.isEmpty()) {
196+
return Literal.of(this, Boolean.FALSE);
197+
}
198+
return new WildcardLikeList(source(), unwrappedField, new WildcardPatternList(filtered), true);
199+
}
180200
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStringCasingWithInsensitiveRegexMatch.java

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
99

1010
import org.elasticsearch.xpack.esql.core.expression.Expression;
11-
import org.elasticsearch.xpack.esql.core.expression.Literal;
12-
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePatternList;
1311
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RegexMatch;
1412
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.StringPattern;
15-
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
1613
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ChangeCase;
17-
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.RLike;
18-
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.WildcardLike;
1914
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
2015

16+
import java.util.function.Predicate;
17+
2118
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStringCasingWithInsensitiveEquals.unwrapCase;
2219

2320
public class ReplaceStringCasingWithInsensitiveRegexMatch extends OptimizerRules.OptimizerExpressionRule<
@@ -29,29 +26,12 @@ public ReplaceStringCasingWithInsensitiveRegexMatch() {
2926

3027
@Override
3128
protected Expression rule(RegexMatch<? extends StringPattern> regexMatch, LogicalOptimizerContext unused) {
32-
Expression e = regexMatch;
33-
if (regexMatch.pattern() instanceof WildcardPatternList || regexMatch.pattern() instanceof RLikePatternList) {
34-
// This optimization is not supported for WildcardPatternList and RLikePatternList for now
35-
return e;
36-
}
3729
if (regexMatch.field() instanceof ChangeCase changeCase) {
38-
var pattern = regexMatch.pattern().pattern();
39-
e = changeCase.caseType().matchesCase(pattern) ? insensitiveRegexMatch(regexMatch) : Literal.of(regexMatch, Boolean.FALSE);
30+
Predicate<String> matchesCase = changeCase.caseType()::matchesCase;
31+
Expression unwrappedField = unwrapCase(regexMatch.field());
32+
return regexMatch.optimizeStringCasingWithInsensitiveRegexMatch(unwrappedField, matchesCase);
4033
}
41-
return e;
42-
}
43-
44-
private static Expression insensitiveRegexMatch(RegexMatch<? extends StringPattern> regexMatch) {
45-
return switch (regexMatch) {
46-
case RLike rlike -> new RLike(rlike.source(), unwrapCase(rlike.field()), rlike.pattern(), true);
47-
case WildcardLike wildcardLike -> new WildcardLike(
48-
wildcardLike.source(),
49-
unwrapCase(wildcardLike.field()),
50-
wildcardLike.pattern(),
51-
true
52-
);
53-
default -> regexMatch;
54-
};
34+
return regexMatch;
5535
}
5636

5737
}

0 commit comments

Comments
 (0)