Skip to content

Commit bad1e04

Browse files
[8.19] Add Support for LIKE (LIST) (#129170) (#129554)
* Add Support for LIKE (LIST) (#129170) Adds support for LIKE function alternative syntax with a list of patterns. Examples: FROM foo | WHERE bar LIKE ("A*","B*", "C?") The new syntax is documented as part of the existing LIKE function documentation. We will use the existing WildcardLike java implementation for existing cases using the old syntax and one list argument case to improve mixed cluster compatibility.
1 parent fa2b53b commit bad1e04

File tree

28 files changed

+2074
-879
lines changed

28 files changed

+2074
-879
lines changed

docs/changelog/129170.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129170
2+
summary: Add Support for LIKE (LIST)
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

docs/reference/esql/functions/kibana/definition/like.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/esql/functions/kibana/definition/not_like.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/esql/functions/kibana/docs/like.md

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/esql/functions/kibana/docs/not_like.md

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
2+
3+
Matching the exact characters `*` and `.` will require escaping.
4+
The escape character is backslash `\`. Since also backslash is a special character in string literals,
5+
it will require further escaping.
6+
7+
```esql
8+
ROW message = "foo * bar"
9+
| WHERE message LIKE "foo \\* bar"
10+
```
11+
12+
13+
```esql
14+
ROW message = "foobar"
15+
| WHERE message like ("foo*", "bar?")
16+
```
17+
18+
19+
To reduce the overhead of escaping, we suggest using triple quotes strings `"""`
20+
21+
```esql
22+
ROW message = "foo * bar"
23+
| WHERE message LIKE """foo \* bar"""
24+
```
25+
26+

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@
1212
import org.apache.lucene.util.automaton.MinimizationOperations;
1313
import org.apache.lucene.util.automaton.Operations;
1414
import org.apache.lucene.util.automaton.RegExp;
15+
import org.elasticsearch.common.io.stream.StreamInput;
16+
import org.elasticsearch.common.io.stream.StreamOutput;
17+
import org.elasticsearch.common.io.stream.Writeable;
1518
import org.elasticsearch.xpack.esql.core.util.StringUtils;
1619

20+
import java.io.IOException;
1721
import java.util.Objects;
1822

1923
import static org.elasticsearch.xpack.esql.core.util.StringUtils.luceneWildcardToRegExp;
@@ -22,10 +26,10 @@
2226
* Similar to basic regex, supporting '?' wildcard for single character (same as regex ".")
2327
* and '*' wildcard for multiple characters (same as regex ".*")
2428
* <p>
25-
* Allows escaping based on a regular char
29+
* Allows escaping based on a regular char.
2630
*
2731
*/
28-
public class WildcardPattern extends AbstractStringPattern {
32+
public class WildcardPattern extends AbstractStringPattern implements Writeable {
2933

3034
private final String wildcard;
3135
private final String regex;
@@ -36,6 +40,15 @@ public WildcardPattern(String pattern) {
3640
this.regex = StringUtils.wildcardToJavaPattern(pattern, '\\');
3741
}
3842

43+
public WildcardPattern(StreamInput in) throws IOException {
44+
this(in.readString());
45+
}
46+
47+
@Override
48+
public void writeTo(StreamOutput out) throws IOException {
49+
out.writeString(wildcard);
50+
}
51+
3952
public String pattern() {
4053
return wildcard;
4154
}
@@ -90,4 +103,5 @@ public boolean equals(Object obj) {
90103
WildcardPattern other = (WildcardPattern) obj;
91104
return Objects.equals(wildcard, other.wildcard);
92105
}
106+
93107
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;
8+
9+
import org.apache.lucene.util.automaton.Automaton;
10+
import org.apache.lucene.util.automaton.Operations;
11+
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
import java.util.List;
17+
import java.util.Objects;
18+
import java.util.stream.Collectors;
19+
20+
/**
21+
* A list of wildcard patterns. Each pattern is a {@link WildcardPattern} that can be used to match strings and is
22+
* similar to basic regex, supporting '?' wildcard for single character (same as regex ".")
23+
* and '*' wildcard for multiple characters (same as regex ".*")
24+
* <p>
25+
* Allows escaping based on a regular char.
26+
*
27+
*/
28+
public class WildcardPatternList extends AbstractStringPattern implements Writeable {
29+
public static final String NAME = "WildcardPatternList";
30+
private final List<WildcardPattern> patternList;
31+
32+
public WildcardPatternList(List<WildcardPattern> patterns) {
33+
this.patternList = patterns;
34+
}
35+
36+
public WildcardPatternList(StreamInput in) throws IOException {
37+
this(in.readCollectionAsList(WildcardPattern::new));
38+
}
39+
40+
@Override
41+
public void writeTo(StreamOutput out) throws IOException {
42+
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
43+
}
44+
45+
public List<WildcardPattern> patternList() {
46+
return patternList;
47+
}
48+
49+
/**
50+
* Creates an automaton that matches any of the patterns in the list.
51+
* We create a single automaton that is the union of all individual automata to improve performance
52+
*/
53+
@Override
54+
public Automaton createAutomaton(boolean ignoreCase) {
55+
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
56+
Automaton result = Operations.union(automatonList);
57+
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
58+
}
59+
60+
/**
61+
* Returns a Java regex that matches any of the patterns in the list.
62+
* The patterns are joined with the '|' operator to create a single regex.
63+
*/
64+
@Override
65+
public String asJavaRegex() {
66+
return patternList.stream().map(WildcardPattern::asJavaRegex).collect(Collectors.joining("|"));
67+
}
68+
69+
/**
70+
* Returns a string that matches any of the patterns in the list.
71+
* The patterns are joined with the '|' operator to create a single wildcard string.
72+
*/
73+
@Override
74+
public String pattern() {
75+
if (patternList.isEmpty()) {
76+
return "";
77+
}
78+
if (patternList.size() == 1) {
79+
return patternList.get(0).pattern();
80+
}
81+
return "(\"" + patternList.stream().map(WildcardPattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
82+
}
83+
84+
@Override
85+
public int hashCode() {
86+
return Objects.hash(patternList);
87+
}
88+
89+
@Override
90+
public boolean equals(Object obj) {
91+
if (this == obj) {
92+
return true;
93+
}
94+
95+
if (obj == null || getClass() != obj.getClass()) {
96+
return false;
97+
}
98+
99+
WildcardPatternList other = (WildcardPatternList) obj;
100+
return patternList.equals(other.patternList);
101+
}
102+
103+
}

x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,42 @@ public void testCaseInsensitiveEquality() throws IOException {
239239
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
240240
}
241241

242+
public void testLike() throws IOException {
243+
String value = "v".repeat(between(0, 256));
244+
String esqlQuery = """
245+
FROM test
246+
| WHERE test like "%value*"
247+
""";
248+
String luceneQuery = switch (type) {
249+
case KEYWORD -> "test:%value*";
250+
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
251+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
252+
};
253+
ComputeSignature dataNodeSignature = switch (type) {
254+
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
255+
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
256+
};
257+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
258+
}
259+
260+
public void testLikeList() throws IOException {
261+
String value = "v".repeat(between(0, 256));
262+
String esqlQuery = """
263+
FROM test
264+
| WHERE test like ("%value*", "abc*")
265+
""";
266+
String luceneQuery = switch (type) {
267+
case KEYWORD, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
268+
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
269+
};
270+
ComputeSignature dataNodeSignature = switch (type) {
271+
case CONSTANT_KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
272+
case AUTO, KEYWORD, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD ->
273+
ComputeSignature.FILTER_IN_COMPUTE;
274+
};
275+
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
276+
}
277+
242278
enum ComputeSignature {
243279
FILTER_IN_COMPUTE(
244280
matchesList().item("LuceneSourceOperator")

0 commit comments

Comments
 (0)