Skip to content

Commit 3e029a9

Browse files
Add support for RLIKE (LIST) with pushdown (#129929)
Adds support for RLIKE function alternative syntax with a list of patterns. Examples: ROW message = "foobar" | WHERE message RLIKE ("foo.*", "bar.") The new syntax is documented as part of the existing RLIKE function documentation. We will use the existing RLike java implementation for existing cases using the old syntax and one list argument case to improve mixed cluster compatibility. The RLikeList is pushed down as a single Automaton to improve performance. (cherry picked from commit f0c30f2) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.interp # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseParser.java
1 parent 9ef53f8 commit 3e029a9

File tree

24 files changed

+1862
-572
lines changed

24 files changed

+1862
-572
lines changed

docs/changelog/129929.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129929
2+
summary: Add support for RLIKE (LIST) with pushdown
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

docs/reference/query-languages/esql/_snippets/operators/detailedDescription/rlike.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,17 @@ ROW message = "foo ( bar"
1717
| WHERE message RLIKE """foo \( bar"""
1818
```
1919

20+
```{applies_to}
21+
stack: ga 9.1
22+
serverless: ga
23+
```
24+
25+
Both a single pattern or a list of patterns are supported. If a list of patterns is provided,
26+
the expression will return true if any of the patterns match.
27+
28+
```esql
29+
ROW message = "foobar"
30+
| WHERE message RLIKE ("foo.*", "bar.")
31+
```
32+
2033

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,30 @@
99
import org.apache.lucene.util.automaton.Automaton;
1010
import org.apache.lucene.util.automaton.Operations;
1111
import org.apache.lucene.util.automaton.RegExp;
12+
import org.elasticsearch.common.io.stream.StreamInput;
13+
import org.elasticsearch.common.io.stream.StreamOutput;
14+
import org.elasticsearch.common.io.stream.Writeable;
1215

16+
import java.io.IOException;
1317
import java.util.Objects;
1418

15-
public class RLikePattern extends AbstractStringPattern {
19+
public class RLikePattern extends AbstractStringPattern implements Writeable {
1620

1721
private final String regexpPattern;
1822

1923
public RLikePattern(String regexpPattern) {
2024
this.regexpPattern = regexpPattern;
2125
}
2226

27+
public RLikePattern(StreamInput in) throws IOException {
28+
this(in.readString());
29+
}
30+
31+
@Override
32+
public void writeTo(StreamOutput out) throws IOException {
33+
out.writeString(regexpPattern);
34+
}
35+
2336
@Override
2437
public Automaton createAutomaton(boolean ignoreCase) {
2538
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;
8+
9+
import org.apache.lucene.util.automaton.Automaton;
10+
import org.apache.lucene.util.automaton.Operations;
11+
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
import java.util.List;
17+
import java.util.Objects;
18+
import java.util.stream.Collectors;
19+
20+
public class RLikePatternList extends AbstractStringPattern implements Writeable {
21+
22+
private final List<RLikePattern> patternList;
23+
24+
public RLikePatternList(List<RLikePattern> patternList) {
25+
this.patternList = patternList;
26+
}
27+
28+
public RLikePatternList(StreamInput in) throws IOException {
29+
this(in.readCollectionAsList(RLikePattern::new));
30+
}
31+
32+
@Override
33+
public void writeTo(StreamOutput out) throws IOException {
34+
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
35+
}
36+
37+
public List<RLikePattern> patternList() {
38+
return patternList;
39+
}
40+
41+
/**
42+
* Creates an automaton that matches any of the patterns in the list.
43+
* We create a single automaton that is the union of all individual automatons to improve performance
44+
*/
45+
@Override
46+
public Automaton createAutomaton(boolean ignoreCase) {
47+
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
48+
Automaton result = Operations.union(automatonList);
49+
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
50+
}
51+
52+
/**
53+
* Returns a Java regex that matches any of the patterns in the list.
54+
* The patterns are joined with the '|' operator to create a single regex.
55+
*/
56+
@Override
57+
public String asJavaRegex() {
58+
return patternList.stream().map(RLikePattern::asJavaRegex).collect(Collectors.joining("|"));
59+
}
60+
61+
@Override
62+
public int hashCode() {
63+
return Objects.hash(patternList);
64+
}
65+
66+
@Override
67+
public boolean equals(Object obj) {
68+
if (this == obj) {
69+
return true;
70+
}
71+
if (obj == null || getClass() != obj.getClass()) {
72+
return false;
73+
}
74+
RLikePatternList other = (RLikePatternList) obj;
75+
return patternList.equals(other.patternList);
76+
}
77+
78+
/**
79+
* Returns a string that matches any of the patterns in the list.
80+
* The patterns are joined with the '|' operator to create a single regex string.
81+
*/
82+
@Override
83+
public String pattern() {
84+
if (patternList.isEmpty()) {
85+
return "";
86+
}
87+
if (patternList.size() == 1) {
88+
return patternList.get(0).pattern();
89+
}
90+
return "(\"" + patternList.stream().map(RLikePattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
91+
}
92+
}

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -164,25 +164,24 @@ private Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder reques
164164
}
165165
}
166166

167-
private <C, V> void assertResultMapForLike(
167+
private <C, V> void assertResultMapWithCapabilities(
168168
boolean includeCCSMetadata,
169169
Map<String, Object> result,
170170
C columns,
171171
V values,
172172
boolean remoteOnly,
173-
boolean requireLikeListCapability
173+
List<String> fullResultCapabilities
174174
) throws IOException {
175-
List<String> requiredCapabilities = new ArrayList<>(List.of("like_on_index_fields"));
176-
if (requireLikeListCapability) {
177-
requiredCapabilities.add("like_list_on_index_fields");
178-
}
179175
// the feature is completely supported if both local and remote clusters support it
180-
boolean isSupported = capabilitiesSupportedNewAndOld(requiredCapabilities);
181-
176+
// otherwise we expect a partial result, and will not check the data
177+
boolean isSupported = capabilitiesSupportedNewAndOld(fullResultCapabilities);
182178
if (isSupported) {
183179
assertResultMap(includeCCSMetadata, result, columns, values, remoteOnly);
184180
} else {
185-
logger.info("--> skipping data check for like index test, cluster does not support like index feature");
181+
logger.info(
182+
"--> skipping data check for a test, cluster does not support all of [{}] capabilities",
183+
String.join(",", fullResultCapabilities)
184+
);
186185
// just verify that we did not get a partial result
187186
var clusters = result.get("_clusters");
188187
var reason = "unexpected partial results" + (clusters != null ? ": _clusters=" + clusters : "");
@@ -427,7 +426,7 @@ public void testLikeIndex() throws Exception {
427426
""", includeCCSMetadata);
428427
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
429428
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
430-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
429+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
431430
}
432431

433432
public void testLikeIndexLegacySettingNoResults() throws Exception {
@@ -449,7 +448,7 @@ public void testLikeIndexLegacySettingNoResults() throws Exception {
449448
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
450449
// we expect empty result, since the setting is false
451450
var values = List.of();
452-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
451+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
453452
}
454453
}
455454

@@ -473,7 +472,7 @@ public void testLikeIndexLegacySettingResults() throws Exception {
473472
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
474473
// we expect results, since the setting is false, but there is : in the LIKE query
475474
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
476-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
475+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
477476
}
478477
}
479478

@@ -487,7 +486,7 @@ public void testNotLikeIndex() throws Exception {
487486
""", includeCCSMetadata);
488487
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
489488
var values = List.of(List.of(localDocs.size(), localIndex));
490-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
489+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
491490
}
492491

493492
public void testLikeListIndex() throws Exception {
@@ -502,7 +501,14 @@ public void testLikeListIndex() throws Exception {
502501
""", includeCCSMetadata);
503502
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
504503
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
505-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
504+
assertResultMapWithCapabilities(
505+
includeCCSMetadata,
506+
result,
507+
columns,
508+
values,
509+
false,
510+
List.of("like_on_index_fields", "like_list_on_index_fields")
511+
);
506512
}
507513

508514
public void testNotLikeListIndex() throws Exception {
@@ -516,7 +522,14 @@ public void testNotLikeListIndex() throws Exception {
516522
""", includeCCSMetadata);
517523
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
518524
var values = List.of(List.of(localDocs.size(), localIndex));
519-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
525+
assertResultMapWithCapabilities(
526+
includeCCSMetadata,
527+
result,
528+
columns,
529+
values,
530+
false,
531+
List.of("like_on_index_fields", "like_list_on_index_fields")
532+
);
520533
}
521534

522535
public void testNotLikeListKeyword() throws Exception {
@@ -540,7 +553,14 @@ public void testNotLikeListKeyword() throws Exception {
540553
if (localCount > 0) {
541554
values.add(List.of(localCount, localIndex));
542555
}
543-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
556+
assertResultMapWithCapabilities(
557+
includeCCSMetadata,
558+
result,
559+
columns,
560+
values,
561+
false,
562+
List.of("like_on_index_fields", "like_list_on_index_fields")
563+
);
544564
}
545565

546566
public void testRLikeIndex() throws Exception {
@@ -553,7 +573,7 @@ public void testRLikeIndex() throws Exception {
553573
""", includeCCSMetadata);
554574
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
555575
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
556-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
576+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
557577
}
558578

559579
public void testNotRLikeIndex() throws Exception {
@@ -566,7 +586,37 @@ public void testNotRLikeIndex() throws Exception {
566586
""", includeCCSMetadata);
567587
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
568588
var values = List.of(List.of(localDocs.size(), localIndex));
569-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
589+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
590+
}
591+
592+
public void testRLikeListIndex() throws Exception {
593+
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
594+
boolean includeCCSMetadata = includeCCSMetadata();
595+
Map<String, Object> result = run("""
596+
FROM test-local-index,*:test-remote-index METADATA _index
597+
| WHERE _index RLIKE (".*remote.*", ".*not-exist.*")
598+
| STATS c = COUNT(*) BY _index
599+
| SORT _index ASC
600+
""", includeCCSMetadata);
601+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
602+
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
603+
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
604+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
605+
}
606+
607+
public void testNotRLikeListIndex() throws Exception {
608+
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
609+
boolean includeCCSMetadata = includeCCSMetadata();
610+
Map<String, Object> result = run("""
611+
FROM test-local-index,*:test-remote-index METADATA _index
612+
| WHERE _index NOT RLIKE (".*remote.*", ".*not-exist.*")
613+
| STATS c = COUNT(*) BY _index
614+
| SORT _index ASC
615+
""", includeCCSMetadata);
616+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
617+
var values = List.of(List.of(localDocs.size(), localIndex));
618+
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
619+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
570620
}
571621

572622
private RestClient remoteClusterClient() throws IOException {

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,42 @@ public void testLikeList() throws IOException {
275275
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
276276
}
277277

278+
public void testRLike() throws IOException {
279+
String value = "v".repeat(between(1, 256));
280+
String esqlQuery = """
281+
FROM test
282+
| WHERE test rlike "%value.*"
283+
""";
284+
String luceneQuery = switch (type) {
285+
case KEYWORD -> "test:/%value.*/";
286+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
287+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
288+
};
289+
ComputeSignature dataNodeSignature = switch (type) {
290+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
291+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
292+
};
293+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
294+
}
295+
296+
public void testRLikeList() throws IOException {
297+
String value = "v".repeat(between(1, 256));
298+
String esqlQuery = """
299+
FROM test
300+
| WHERE test rlike ("%value.*", "abc.*")
301+
""";
302+
String luceneQuery = switch (type) {
303+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
304+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
305+
case KEYWORD -> "test:RLIKE(\"%value.*\", \"abc.*\"), caseInsensitive=false";
306+
};
307+
ComputeSignature dataNodeSignature = switch (type) {
308+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
309+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
310+
};
311+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
312+
}
313+
278314
enum ComputeSignature {
279315
FILTER_IN_COMPUTE(
280316
matchesList().item("LuceneSourceOperator")

0 commit comments

Comments
 (0)