Skip to content

Commit f0c30f2

Browse files
Add support for RLIKE (LIST) with pushdown (#129929)
Adds support for RLIKE function alternative syntax with a list of patterns. Examples: ROW message = "foobar" | WHERE message RLIKE ("foo.*", "bar.") The new syntax is documented as part of the existing RLIKE function documentation. We will use the existing RLike java implementation for existing cases using the old syntax and one list argument case to improve mixed cluster compatibility. The RLikeList is pushed down as a single Automaton to improve performance.
1 parent 03975a3 commit f0c30f2

File tree

24 files changed

+1759
-469
lines changed

24 files changed

+1759
-469
lines changed

docs/changelog/129929.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129929
2+
summary: Add support for RLIKE (LIST) with pushdown
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

docs/reference/query-languages/esql/_snippets/operators/detailedDescription/rlike.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,17 @@ ROW message = "foo ( bar"
1717
| WHERE message RLIKE """foo \( bar"""
1818
```
1919

20+
```{applies_to}
21+
stack: ga 9.1
22+
serverless: ga
23+
```
24+
25+
Both a single pattern or a list of patterns are supported. If a list of patterns is provided,
26+
the expression will return true if any of the patterns match.
27+
28+
```esql
29+
ROW message = "foobar"
30+
| WHERE message RLIKE ("foo.*", "bar.")
31+
```
32+
2033

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,30 @@
99
import org.apache.lucene.util.automaton.Automaton;
1010
import org.apache.lucene.util.automaton.Operations;
1111
import org.apache.lucene.util.automaton.RegExp;
12+
import org.elasticsearch.common.io.stream.StreamInput;
13+
import org.elasticsearch.common.io.stream.StreamOutput;
14+
import org.elasticsearch.common.io.stream.Writeable;
1215

16+
import java.io.IOException;
1317
import java.util.Objects;
1418

15-
public class RLikePattern extends AbstractStringPattern {
19+
public class RLikePattern extends AbstractStringPattern implements Writeable {
1620

1721
private final String regexpPattern;
1822

1923
public RLikePattern(String regexpPattern) {
2024
this.regexpPattern = regexpPattern;
2125
}
2226

27+
public RLikePattern(StreamInput in) throws IOException {
28+
this(in.readString());
29+
}
30+
31+
@Override
32+
public void writeTo(StreamOutput out) throws IOException {
33+
out.writeString(regexpPattern);
34+
}
35+
2336
@Override
2437
public Automaton createAutomaton(boolean ignoreCase) {
2538
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;
8+
9+
import org.apache.lucene.util.automaton.Automaton;
10+
import org.apache.lucene.util.automaton.Operations;
11+
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
import java.util.List;
17+
import java.util.Objects;
18+
import java.util.stream.Collectors;
19+
20+
public class RLikePatternList extends AbstractStringPattern implements Writeable {
21+
22+
private final List<RLikePattern> patternList;
23+
24+
public RLikePatternList(List<RLikePattern> patternList) {
25+
this.patternList = patternList;
26+
}
27+
28+
public RLikePatternList(StreamInput in) throws IOException {
29+
this(in.readCollectionAsList(RLikePattern::new));
30+
}
31+
32+
@Override
33+
public void writeTo(StreamOutput out) throws IOException {
34+
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
35+
}
36+
37+
public List<RLikePattern> patternList() {
38+
return patternList;
39+
}
40+
41+
/**
42+
* Creates an automaton that matches any of the patterns in the list.
43+
* We create a single automaton that is the union of all individual automatons to improve performance
44+
*/
45+
@Override
46+
public Automaton createAutomaton(boolean ignoreCase) {
47+
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
48+
Automaton result = Operations.union(automatonList);
49+
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
50+
}
51+
52+
/**
53+
* Returns a Java regex that matches any of the patterns in the list.
54+
* The patterns are joined with the '|' operator to create a single regex.
55+
*/
56+
@Override
57+
public String asJavaRegex() {
58+
return patternList.stream().map(RLikePattern::asJavaRegex).collect(Collectors.joining("|"));
59+
}
60+
61+
@Override
62+
public int hashCode() {
63+
return Objects.hash(patternList);
64+
}
65+
66+
@Override
67+
public boolean equals(Object obj) {
68+
if (this == obj) {
69+
return true;
70+
}
71+
if (obj == null || getClass() != obj.getClass()) {
72+
return false;
73+
}
74+
RLikePatternList other = (RLikePatternList) obj;
75+
return patternList.equals(other.patternList);
76+
}
77+
78+
/**
79+
* Returns a string that matches any of the patterns in the list.
80+
* The patterns are joined with the '|' operator to create a single regex string.
81+
*/
82+
@Override
83+
public String pattern() {
84+
if (patternList.isEmpty()) {
85+
return "";
86+
}
87+
if (patternList.size() == 1) {
88+
return patternList.get(0).pattern();
89+
}
90+
return "(\"" + patternList.stream().map(RLikePattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
91+
}
92+
}

x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -205,25 +205,24 @@ private Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder reques
205205
}
206206
}
207207

208-
private <C, V> void assertResultMapForLike(
208+
private <C, V> void assertResultMapWithCapabilities(
209209
boolean includeCCSMetadata,
210210
Map<String, Object> result,
211211
C columns,
212212
V values,
213213
boolean remoteOnly,
214-
boolean requireLikeListCapability
214+
List<String> fullResultCapabilities
215215
) throws IOException {
216-
List<String> requiredCapabilities = new ArrayList<>(List.of("like_on_index_fields"));
217-
if (requireLikeListCapability) {
218-
requiredCapabilities.add("like_list_on_index_fields");
219-
}
220216
// the feature is completely supported if both local and remote clusters support it
221-
boolean isSupported = capabilitiesSupportedNewAndOld(requiredCapabilities);
222-
217+
// otherwise we expect a partial result, and will not check the data
218+
boolean isSupported = capabilitiesSupportedNewAndOld(fullResultCapabilities);
223219
if (isSupported) {
224220
assertResultMap(includeCCSMetadata, result, columns, values, remoteOnly);
225221
} else {
226-
logger.info("--> skipping data check for like index test, cluster does not support like index feature");
222+
logger.info(
223+
"--> skipping data check for a test, cluster does not support all of [{}] capabilities",
224+
String.join(",", fullResultCapabilities)
225+
);
227226
// just verify that we did not get a partial result
228227
var clusters = result.get("_clusters");
229228
var reason = "unexpected partial results" + (clusters != null ? ": _clusters=" + clusters : "");
@@ -526,7 +525,7 @@ public void testLikeIndex() throws Exception {
526525
""", includeCCSMetadata);
527526
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
528527
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
529-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
528+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
530529
}
531530

532531
public void testLikeIndexLegacySettingNoResults() throws Exception {
@@ -548,7 +547,7 @@ public void testLikeIndexLegacySettingNoResults() throws Exception {
548547
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
549548
// we expect empty result, since the setting is false
550549
var values = List.of();
551-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
550+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
552551
}
553552
}
554553

@@ -572,7 +571,7 @@ public void testLikeIndexLegacySettingResults() throws Exception {
572571
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
573572
// we expect results, since the setting is false, but there is : in the LIKE query
574573
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
575-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
574+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
576575
}
577576
}
578577

@@ -586,7 +585,7 @@ public void testNotLikeIndex() throws Exception {
586585
""", includeCCSMetadata);
587586
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
588587
var values = List.of(List.of(localDocs.size(), localIndex));
589-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
588+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
590589
}
591590

592591
public void testLikeListIndex() throws Exception {
@@ -601,7 +600,14 @@ public void testLikeListIndex() throws Exception {
601600
""", includeCCSMetadata);
602601
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
603602
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
604-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
603+
assertResultMapWithCapabilities(
604+
includeCCSMetadata,
605+
result,
606+
columns,
607+
values,
608+
false,
609+
List.of("like_on_index_fields", "like_list_on_index_fields")
610+
);
605611
}
606612

607613
public void testNotLikeListIndex() throws Exception {
@@ -615,7 +621,14 @@ public void testNotLikeListIndex() throws Exception {
615621
""", includeCCSMetadata);
616622
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
617623
var values = List.of(List.of(localDocs.size(), localIndex));
618-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
624+
assertResultMapWithCapabilities(
625+
includeCCSMetadata,
626+
result,
627+
columns,
628+
values,
629+
false,
630+
List.of("like_on_index_fields", "like_list_on_index_fields")
631+
);
619632
}
620633

621634
public void testNotLikeListKeyword() throws Exception {
@@ -639,7 +652,14 @@ public void testNotLikeListKeyword() throws Exception {
639652
if (localCount > 0) {
640653
values.add(List.of(localCount, localIndex));
641654
}
642-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
655+
assertResultMapWithCapabilities(
656+
includeCCSMetadata,
657+
result,
658+
columns,
659+
values,
660+
false,
661+
List.of("like_on_index_fields", "like_list_on_index_fields")
662+
);
643663
}
644664

645665
public void testRLikeIndex() throws Exception {
@@ -652,7 +672,7 @@ public void testRLikeIndex() throws Exception {
652672
""", includeCCSMetadata);
653673
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
654674
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
655-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
675+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
656676
}
657677

658678
public void testNotRLikeIndex() throws Exception {
@@ -665,7 +685,37 @@ public void testNotRLikeIndex() throws Exception {
665685
""", includeCCSMetadata);
666686
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
667687
var values = List.of(List.of(localDocs.size(), localIndex));
668-
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
688+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
689+
}
690+
691+
public void testRLikeListIndex() throws Exception {
692+
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
693+
boolean includeCCSMetadata = includeCCSMetadata();
694+
Map<String, Object> result = run("""
695+
FROM test-local-index,*:test-remote-index METADATA _index
696+
| WHERE _index RLIKE (".*remote.*", ".*not-exist.*")
697+
| STATS c = COUNT(*) BY _index
698+
| SORT _index ASC
699+
""", includeCCSMetadata);
700+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
701+
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
702+
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
703+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
704+
}
705+
706+
public void testNotRLikeListIndex() throws Exception {
707+
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
708+
boolean includeCCSMetadata = includeCCSMetadata();
709+
Map<String, Object> result = run("""
710+
FROM test-local-index,*:test-remote-index METADATA _index
711+
| WHERE _index NOT RLIKE (".*remote.*", ".*not-exist.*")
712+
| STATS c = COUNT(*) BY _index
713+
| SORT _index ASC
714+
""", includeCCSMetadata);
715+
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
716+
var values = List.of(List.of(localDocs.size(), localIndex));
717+
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
718+
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
669719
}
670720

671721
private RestClient remoteClusterClient() throws IOException {

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,42 @@ public void testLikeList() throws IOException {
275275
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
276276
}
277277

278+
public void testRLike() throws IOException {
279+
String value = "v".repeat(between(1, 256));
280+
String esqlQuery = """
281+
FROM test
282+
| WHERE test rlike "%value.*"
283+
""";
284+
String luceneQuery = switch (type) {
285+
case KEYWORD -> "test:/%value.*/";
286+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
287+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
288+
};
289+
ComputeSignature dataNodeSignature = switch (type) {
290+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
291+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
292+
};
293+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
294+
}
295+
296+
public void testRLikeList() throws IOException {
297+
String value = "v".repeat(between(1, 256));
298+
String esqlQuery = """
299+
FROM test
300+
| WHERE test rlike ("%value.*", "abc.*")
301+
""";
302+
String luceneQuery = switch (type) {
303+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
304+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
305+
case KEYWORD -> "test:RLIKE(\"%value.*\", \"abc.*\"), caseInsensitive=false";
306+
};
307+
ComputeSignature dataNodeSignature = switch (type) {
308+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
309+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
310+
};
311+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
312+
}
313+
278314
enum ComputeSignature {
279315
FILTER_IN_COMPUTE(
280316
matchesList().item("LuceneSourceOperator")

0 commit comments

Comments
 (0)