Skip to content

Commit bf677a5

Browse files
Add support for RLIKE (LIST)
1 parent 4830dc8 commit bf677a5

File tree

19 files changed

+1089
-7
lines changed

19 files changed

+1089
-7
lines changed

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,30 @@
99
import org.apache.lucene.util.automaton.Automaton;
1010
import org.apache.lucene.util.automaton.Operations;
1111
import org.apache.lucene.util.automaton.RegExp;
12+
import org.elasticsearch.common.io.stream.StreamInput;
13+
import org.elasticsearch.common.io.stream.StreamOutput;
14+
import org.elasticsearch.common.io.stream.Writeable;
1215

16+
import java.io.IOException;
1317
import java.util.Objects;
1418

15-
public class RLikePattern extends AbstractStringPattern {
19+
public class RLikePattern extends AbstractStringPattern implements Writeable {
1620

1721
private final String regexpPattern;
1822

1923
public RLikePattern(String regexpPattern) {
2024
this.regexpPattern = regexpPattern;
2125
}
2226

27+
public RLikePattern(StreamInput in) throws IOException {
28+
this(in.readString());
29+
}
30+
31+
@Override
32+
public void writeTo(StreamOutput out) throws IOException {
33+
out.writeString(regexpPattern);
34+
}
35+
2336
@Override
2437
public Automaton createAutomaton(boolean ignoreCase) {
2538
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;
8+
9+
import org.apache.lucene.util.automaton.Automaton;
10+
import org.apache.lucene.util.automaton.Operations;
11+
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
import java.util.List;
17+
import java.util.Objects;
18+
import java.util.stream.Collectors;
19+
20+
public class RLikePatternList extends AbstractStringPattern implements Writeable {
21+
22+
private final List<RLikePattern> patternList;
23+
24+
public RLikePatternList(List<RLikePattern> patternList) {
25+
26+
this.patternList = patternList;
27+
}
28+
29+
public RLikePatternList(StreamInput in) throws IOException {
30+
this(in.readCollectionAsList(RLikePattern::new));
31+
}
32+
33+
@Override
34+
public void writeTo(StreamOutput out) throws IOException {
35+
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
36+
}
37+
38+
public List<RLikePattern> patternList() {
39+
return patternList;
40+
}
41+
42+
/**
43+
* Creates an automaton that matches any of the patterns in the list.
44+
* We create a single automaton that is the union of all individual automatons to improve performance
45+
*/
46+
@Override
47+
public Automaton createAutomaton(boolean ignoreCase) {
48+
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
49+
Automaton result = Operations.union(automatonList);
50+
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
51+
}
52+
53+
/**
54+
* Returns a Java regex that matches any of the patterns in the list.
55+
* The patterns are joined with the '|' operator to create a single regex.
56+
*/
57+
@Override
58+
public String asJavaRegex() {
59+
return patternList.stream().map(RLikePattern::asJavaRegex).collect(Collectors.joining("|"));
60+
}
61+
62+
@Override
63+
public int hashCode() {
64+
return Objects.hash(patternList);
65+
}
66+
67+
@Override
68+
public boolean equals(Object obj) {
69+
if (this == obj) {
70+
return true;
71+
}
72+
73+
if (obj == null || getClass() != obj.getClass()) {
74+
return false;
75+
}
76+
77+
RLikePatternList other = (RLikePatternList) obj;
78+
return patternList.equals(other.patternList);
79+
}
80+
81+
/**
82+
* Returns a string that matches any of the patterns in the list.
83+
* The patterns are joined with the '|' operator to create a single regex string.
84+
*/
85+
@Override
86+
public String pattern() {
87+
if (patternList.isEmpty()) {
88+
return "";
89+
}
90+
if (patternList.size() == 1) {
91+
return patternList.get(0).pattern();
92+
}
93+
return "(\"" + patternList.stream().map(RLikePattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
94+
}
95+
}

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,42 @@ public void testLikeList() throws IOException {
275275
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
276276
}
277277

278+
public void testRLike() throws IOException {
279+
String value = "v".repeat(between(1, 256));
280+
String esqlQuery = """
281+
FROM test
282+
| WHERE test rlike "%value.*"
283+
""";
284+
String luceneQuery = switch (type) {
285+
case KEYWORD -> "test:/%value.*/";
286+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
287+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
288+
};
289+
ComputeSignature dataNodeSignature = switch (type) {
290+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
291+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
292+
};
293+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
294+
}
295+
296+
public void testRLikeList() throws IOException {
297+
String value = "v".repeat(between(1, 256));
298+
String esqlQuery = """
299+
FROM test
300+
| WHERE test rlike ("%value.*", "abc.*")
301+
""";
302+
String luceneQuery = switch (type) {
303+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
304+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
305+
case KEYWORD -> "test:AutomatonQuery";
306+
};
307+
ComputeSignature dataNodeSignature = switch (type) {
308+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
309+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
310+
};
311+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
312+
}
313+
278314
enum ComputeSignature {
279315
FILTER_IN_COMPUTE(
280316
matchesList().item("LuceneSourceOperator")

0 commit comments

Comments
 (0)