Skip to content

Commit dbcce0d

Browse files
Integrate LIKE/RLIKE LIST with ReplaceStringCasingWithInsensitiveRegexMatch rule
1 parent d365412 commit dbcce0d

File tree

7 files changed

+133
-26
lines changed

7 files changed

+133
-26
lines changed

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RegexMatch.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.xpack.esql.core.type.DataType;
1616

1717
import java.util.Objects;
18+
import java.util.function.Predicate;
1819

1920
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
2021
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isStringAndExact;
@@ -67,6 +68,14 @@ public Boolean fold(FoldContext ctx) {
6768
throw new UnsupportedOperationException();
6869
}
6970

71+
/**
72+
* Returns an equivalent optimized expression taking into account the case of the pattern(s)
73+
* @param unwrappedField the field with to_upper/to_lower function removed
74+
* @param matchesCaseFn a predicate to check if a pattern matches the case
75+
* @return an optimized equivalent Expression or this if no optimization is possible
76+
*/
77+
public abstract Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn);
78+
7079
@Override
7180
public boolean equals(Object obj) {
7281
if (super.equals(obj)) {

x-pack/plugin/esql/qa/testFixtures/src/main/resources/where-like.csv-spec

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,60 @@ emp_no:integer | first_name:keyword
534534
10055 | Georgy
535535
;
536536

537+
likeListWithUpperAllLower
538+
required_capability: like_with_list_of_patterns
539+
FROM employees
540+
| WHERE TO_UPPER(first_name) LIKE ("geor*", "wei*")
541+
| KEEP emp_no, first_name
542+
| SORT emp_no;
543+
544+
emp_no:integer | first_name:keyword
545+
;
546+
547+
likeListWithUpperAllUpper
548+
required_capability: like_with_list_of_patterns
549+
FROM employees
550+
| WHERE TO_UPPER(first_name) LIKE ("GEOR*", "WEI*")
551+
| KEEP emp_no, first_name
552+
| SORT emp_no;
553+
554+
emp_no:integer | first_name:keyword
555+
10001 | Georgi
556+
10040 | Weiyi
557+
10055 | Georgy
558+
;
559+
560+
likeListWithUpperMixedCase
561+
required_capability: like_with_list_of_patterns
562+
FROM employees
563+
| WHERE TO_UPPER(first_name) LIKE ("GeOr*", "wEiY*", "bErNi")
564+
| KEEP emp_no, first_name
565+
| SORT emp_no;
566+
567+
emp_no:integer | first_name:keyword
568+
;
569+
570+
likeListWithUpperMultiplePatternsMixedCase
571+
required_capability: like_with_list_of_patterns
572+
FROM employees
573+
| WHERE TO_UPPER(first_name) LIKE ("geor*", "WEIYI*", "bErnI*")
574+
| KEEP emp_no, first_name
575+
| SORT emp_no;
576+
577+
emp_no:integer | first_name:keyword
578+
10040 | Weiyi
579+
;
580+
581+
likeListWithUpperNoMatch
582+
required_capability: like_with_list_of_patterns
583+
FROM employees
584+
| WHERE TO_UPPER(first_name) LIKE ("notaname*")
585+
| KEEP emp_no, first_name
586+
| SORT emp_no;
587+
588+
emp_no:integer | first_name:keyword
589+
;
590+
537591
rlikeListEmptyArgWildcard
538592
required_capability: rlike_with_list_of_patterns
539593
FROM employees

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/RLike.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.xpack.esql.core.expression.Expression;
14+
import org.elasticsearch.xpack.esql.core.expression.Literal;
1415
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePattern;
1516
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
1617
import org.elasticsearch.xpack.esql.core.querydsl.query.RegexQuery;
@@ -24,6 +25,7 @@
2425
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2526

2627
import java.io.IOException;
28+
import java.util.function.Predicate;
2729

2830
public class RLike extends RegexMatch<RLikePattern> {
2931
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "RLike", RLike::new);
@@ -108,4 +110,15 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
108110
// TODO: see whether escaping is needed
109111
return new RegexQuery(source(), handler.nameOf(fa.exactAttribute()), pattern().asJavaRegex(), caseInsensitive());
110112
}
113+
114+
/**
115+
* Pushes down string casing optimization for a single pattern using the provided predicate.
116+
* Returns a new RLike with case insensitivity or a Literal.FALSE if not matched.
117+
*/
118+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
119+
if (matchesCaseFn.test(pattern().pattern()) == false) {
120+
return Literal.of(this, Boolean.FALSE);
121+
}
122+
return new RLike(source(), unwrappedField, pattern(), true);
123+
}
111124
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/RLikeList.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.index.query.SearchExecutionContext;
1818
import org.elasticsearch.xpack.esql.core.expression.Expression;
1919
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
20+
import org.elasticsearch.xpack.esql.core.expression.Literal;
2021
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePattern;
2122
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePatternList;
2223
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
@@ -29,6 +30,8 @@
2930
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
3031

3132
import java.io.IOException;
33+
import java.util.List;
34+
import java.util.function.Predicate;
3235
import java.util.function.Supplier;
3336
import java.util.stream.Collectors;
3437

@@ -145,6 +148,22 @@ public org.apache.lucene.search.Query asLuceneQuery(
145148
);
146149
}
147150

151+
/**
152+
* Pushes down string casing optimization by filtering patterns using the provided predicate.
153+
* Returns a new RegexMatch or a Literal.FALSE if none match.
154+
*/
155+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
156+
List<RLikePattern> filtered = pattern().patternList()
157+
.stream()
158+
.filter(p -> matchesCaseFn.test(p.pattern()))
159+
.collect(Collectors.toList());
160+
// none of the patterns matches the case of the field, return false
161+
if (filtered.isEmpty()) {
162+
return Literal.of(this, Boolean.FALSE);
163+
}
164+
return new RLikeList(source(), unwrappedField, new RLikePatternList(filtered), true);
165+
}
166+
148167
@Override
149168
protected NodeInfo<? extends Expression> info() {
150169
return NodeInfo.create(this, RLikeList::new, field(), pattern(), caseInsensitive());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLike.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.xpack.esql.core.expression.Expression;
1414
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
15+
import org.elasticsearch.xpack.esql.core.expression.Literal;
1516
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
1617
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
1718
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
@@ -25,6 +26,7 @@
2526
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2627

2728
import java.io.IOException;
29+
import java.util.function.Predicate;
2830

2931
public class WildcardLike extends RegexMatch<WildcardPattern> {
3032
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
@@ -132,4 +134,15 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
132134
private Query translateField(String targetFieldName, boolean forceStringMatch) {
133135
return new WildcardQuery(source(), targetFieldName, pattern().asLuceneWildcard(), caseInsensitive(), forceStringMatch);
134136
}
137+
138+
/**
139+
* Pushes down string casing optimization for a single pattern using the provided predicate.
140+
* Returns a new WildcardLike with case insensitivity or a Literal.FALSE if not matched.
141+
*/
142+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
143+
if (matchesCaseFn.test(pattern().pattern()) == false) {
144+
return Literal.of(this, Boolean.FALSE);
145+
}
146+
return new WildcardLike(source(), unwrappedField, pattern(), true);
147+
}
135148
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/regex/WildcardLikeList.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.index.query.SearchExecutionContext;
1919
import org.elasticsearch.xpack.esql.core.expression.Expression;
2020
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.Literal;
2122
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern;
2223
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
2324
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
@@ -31,6 +32,8 @@
3132
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
3233

3334
import java.io.IOException;
35+
import java.util.List;
36+
import java.util.function.Predicate;
3437
import java.util.function.Supplier;
3538
import java.util.stream.Collectors;
3639

@@ -177,4 +180,20 @@ private String getLuceneQueryDescription() {
177180
private Query translateField(String targetFieldName) {
178181
return new ExpressionQuery(source(), targetFieldName, this);
179182
}
183+
184+
/**
185+
* Pushes down string casing optimization by filtering patterns using the provided predicate.
186+
* Returns a new RegexMatch or a Literal.FALSE if none match.
187+
*/
188+
public Expression optimizeStringCasingWithInsensitiveRegexMatch(Expression unwrappedField, Predicate<String> matchesCaseFn) {
189+
List<WildcardPattern> filtered = pattern().patternList()
190+
.stream()
191+
.filter(p -> matchesCaseFn.test(p.pattern()))
192+
.collect(Collectors.toList());
193+
// none of the patterns matches the case of the field, return false
194+
if (filtered.isEmpty()) {
195+
return Literal.of(this, Boolean.FALSE);
196+
}
197+
return new WildcardLikeList(source(), unwrappedField, new WildcardPatternList(filtered), true);
198+
}
180199
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStringCasingWithInsensitiveRegexMatch.java

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
99

1010
import org.elasticsearch.xpack.esql.core.expression.Expression;
11-
import org.elasticsearch.xpack.esql.core.expression.Literal;
12-
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePatternList;
1311
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RegexMatch;
1412
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.StringPattern;
15-
import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPatternList;
1613
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ChangeCase;
17-
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.RLike;
18-
import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.WildcardLike;
1914
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
2015

16+
import java.util.function.Predicate;
17+
2118
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStringCasingWithInsensitiveEquals.unwrapCase;
2219

2320
public class ReplaceStringCasingWithInsensitiveRegexMatch extends OptimizerRules.OptimizerExpressionRule<
@@ -29,29 +26,12 @@ public ReplaceStringCasingWithInsensitiveRegexMatch() {
2926

3027
@Override
3128
protected Expression rule(RegexMatch<? extends StringPattern> regexMatch, LogicalOptimizerContext unused) {
32-
Expression e = regexMatch;
33-
if (regexMatch.pattern() instanceof WildcardPatternList || regexMatch.pattern() instanceof RLikePatternList) {
34-
// This optimization is not supported for WildcardPatternList and RLikePatternList for now
35-
return e;
36-
}
3729
if (regexMatch.field() instanceof ChangeCase changeCase) {
38-
var pattern = regexMatch.pattern().pattern();
39-
e = changeCase.caseType().matchesCase(pattern) ? insensitiveRegexMatch(regexMatch) : Literal.of(regexMatch, Boolean.FALSE);
30+
Predicate<String> matchesCase = changeCase.caseType()::matchesCase;
31+
Expression unwrappedField = unwrapCase(regexMatch.field());
32+
return regexMatch.optimizeStringCasingWithInsensitiveRegexMatch(unwrappedField, matchesCase);
4033
}
41-
return e;
42-
}
43-
44-
private static Expression insensitiveRegexMatch(RegexMatch<? extends StringPattern> regexMatch) {
45-
return switch (regexMatch) {
46-
case RLike rlike -> new RLike(rlike.source(), unwrapCase(rlike.field()), rlike.pattern(), true);
47-
case WildcardLike wildcardLike -> new WildcardLike(
48-
wildcardLike.source(),
49-
unwrapCase(wildcardLike.field()),
50-
wildcardLike.pattern(),
51-
true
52-
);
53-
default -> regexMatch;
54-
};
34+
return regexMatch;
5535
}
5636

5737
}

0 commit comments

Comments
 (0)