Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/129929.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129929
summary: Add support for RLIKE (LIST) with pushdown
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,17 @@ ROW message = "foo ( bar"
| WHERE message RLIKE """foo \( bar"""
```

```{applies_to}
stack: ga 9.1
serverless: ga
```

Both a single pattern or a list of patterns are supported. If a list of patterns is provided,
the expression will return true if any of the patterns match.

```esql
ROW message = "foobar"
| WHERE message RLIKE ("foo.*", "bar.")
```


Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,30 @@
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;
import java.util.Objects;

public class RLikePattern extends AbstractStringPattern {
public class RLikePattern extends AbstractStringPattern implements Writeable {

private final String regexpPattern;

public RLikePattern(String regexpPattern) {
this.regexpPattern = regexpPattern;
}

public RLikePattern(StreamInput in) throws IOException {
this(in.readString());
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(regexpPattern);
}

@Override
public Automaton createAutomaton(boolean ignoreCase) {
int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.core.expression.predicate.regex;

import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

public class RLikePatternList extends AbstractStringPattern implements Writeable {

private final List<RLikePattern> patternList;

public RLikePatternList(List<RLikePattern> patternList) {
this.patternList = patternList;
}

public RLikePatternList(StreamInput in) throws IOException {
this(in.readCollectionAsList(RLikePattern::new));
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeCollection(patternList, (o, pattern) -> pattern.writeTo(o));
}

public List<RLikePattern> patternList() {
return patternList;
}

/**
* Creates an automaton that matches any of the patterns in the list.
* We create a single automaton that is the union of all individual automatons to improve performance
*/
@Override
public Automaton createAutomaton(boolean ignoreCase) {
List<Automaton> automatonList = patternList.stream().map(x -> x.createAutomaton(ignoreCase)).toList();
Automaton result = Operations.union(automatonList);
return Operations.determinize(result, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
}

/**
* Returns a Java regex that matches any of the patterns in the list.
* The patterns are joined with the '|' operator to create a single regex.
*/
@Override
public String asJavaRegex() {
return patternList.stream().map(RLikePattern::asJavaRegex).collect(Collectors.joining("|"));
}

@Override
public int hashCode() {
return Objects.hash(patternList);
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
RLikePatternList other = (RLikePatternList) obj;
return patternList.equals(other.patternList);
}

/**
* Returns a string that matches any of the patterns in the list.
* The patterns are joined with the '|' operator to create a single regex string.
*/
@Override
public String pattern() {
if (patternList.isEmpty()) {
return "";
}
if (patternList.size() == 1) {
return patternList.get(0).pattern();
}
return "(\"" + patternList.stream().map(RLikePattern::pattern).collect(Collectors.joining("\", \"")) + "\")";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,25 +205,24 @@ private Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder reques
}
}

private <C, V> void assertResultMapForLike(
private <C, V> void assertResultMapWithCapabilities(
boolean includeCCSMetadata,
Map<String, Object> result,
C columns,
V values,
boolean remoteOnly,
boolean requireLikeListCapability
List<String> fullResultCapabilities
) throws IOException {
List<String> requiredCapabilities = new ArrayList<>(List.of("like_on_index_fields"));
if (requireLikeListCapability) {
requiredCapabilities.add("like_list_on_index_fields");
}
// the feature is completely supported if both local and remote clusters support it
boolean isSupported = capabilitiesSupportedNewAndOld(requiredCapabilities);

// otherwise we expect a partial result, and will not check the data
boolean isSupported = capabilitiesSupportedNewAndOld(fullResultCapabilities);
if (isSupported) {
assertResultMap(includeCCSMetadata, result, columns, values, remoteOnly);
} else {
logger.info("--> skipping data check for like index test, cluster does not support like index feature");
logger.info(
"--> skipping data check for a test, cluster does not support all of [{}] capabilities",
String.join(",", fullResultCapabilities)
);
// just verify that we did not get a partial result
var clusters = result.get("_clusters");
var reason = "unexpected partial results" + (clusters != null ? ": _clusters=" + clusters : "");
Expand Down Expand Up @@ -526,7 +525,7 @@ public void testLikeIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

public void testLikeIndexLegacySettingNoResults() throws Exception {
Expand All @@ -548,7 +547,7 @@ public void testLikeIndexLegacySettingNoResults() throws Exception {
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
// we expect empty result, since the setting is false
var values = List.of();
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}
}

Expand All @@ -572,7 +571,7 @@ public void testLikeIndexLegacySettingResults() throws Exception {
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
// we expect results, since the setting is false, but there is : in the LIKE query
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}
}

Expand All @@ -586,7 +585,7 @@ public void testNotLikeIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(localDocs.size(), localIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

public void testLikeListIndex() throws Exception {
Expand All @@ -601,7 +600,14 @@ public void testLikeListIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
assertResultMapWithCapabilities(
includeCCSMetadata,
result,
columns,
values,
false,
List.of("like_on_index_fields", "like_list_on_index_fields")
);
}

public void testNotLikeListIndex() throws Exception {
Expand All @@ -615,7 +621,14 @@ public void testNotLikeListIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(localDocs.size(), localIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
assertResultMapWithCapabilities(
includeCCSMetadata,
result,
columns,
values,
false,
List.of("like_on_index_fields", "like_list_on_index_fields")
);
}

public void testNotLikeListKeyword() throws Exception {
Expand All @@ -639,7 +652,14 @@ public void testNotLikeListKeyword() throws Exception {
if (localCount > 0) {
values.add(List.of(localCount, localIndex));
}
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, true);
assertResultMapWithCapabilities(
includeCCSMetadata,
result,
columns,
values,
false,
List.of("like_on_index_fields", "like_list_on_index_fields")
);
}

public void testRLikeIndex() throws Exception {
Expand All @@ -652,7 +672,7 @@ public void testRLikeIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

public void testNotRLikeIndex() throws Exception {
Expand All @@ -665,7 +685,37 @@ public void testNotRLikeIndex() throws Exception {
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(localDocs.size(), localIndex));
assertResultMapForLike(includeCCSMetadata, result, columns, values, false, false);
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

public void testRLikeListIndex() throws Exception {
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
boolean includeCCSMetadata = includeCCSMetadata();
Map<String, Object> result = run("""
FROM test-local-index,*:test-remote-index METADATA _index
| WHERE _index RLIKE (".*remote.*", ".*not-exist.*")
| STATS c = COUNT(*) BY _index
| SORT _index ASC
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(remoteDocs.size(), REMOTE_CLUSTER_NAME + ":" + remoteIndex));
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

public void testNotRLikeListIndex() throws Exception {
assumeTrue("not supported", capabilitiesSupportedNewAndOld(List.of("rlike_with_list_of_patterns")));
boolean includeCCSMetadata = includeCCSMetadata();
Map<String, Object> result = run("""
FROM test-local-index,*:test-remote-index METADATA _index
| WHERE _index NOT RLIKE (".*remote.*", ".*not-exist.*")
| STATS c = COUNT(*) BY _index
| SORT _index ASC
""", includeCCSMetadata);
var columns = List.of(Map.of("name", "c", "type", "long"), Map.of("name", "_index", "type", "keyword"));
var values = List.of(List.of(localDocs.size(), localIndex));
// we depend on the code in like_on_index_fields to serialize an ExpressionQueryBuilder
assertResultMapWithCapabilities(includeCCSMetadata, result, columns, values, false, List.of("like_on_index_fields"));
}

private RestClient remoteClusterClient() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,42 @@ public void testLikeList() throws IOException {
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

public void testRLike() throws IOException {
String value = "v".repeat(between(1, 256));
String esqlQuery = """
FROM test
| WHERE test rlike "%value.*"
""";
String luceneQuery = switch (type) {
case KEYWORD -> "test:/%value.*/";
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
};
ComputeSignature dataNodeSignature = switch (type) {
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
};
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

public void testRLikeList() throws IOException {
String value = "v".repeat(between(1, 256));
String esqlQuery = """
FROM test
| WHERE test rlike ("%value.*", "abc.*")
""";
String luceneQuery = switch (type) {
case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, AUTO, TEXT_WITH_KEYWORD -> "*:*";
case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]";
case KEYWORD -> "test:RLIKE(\"%value.*\", \"abc.*\"), caseInsensitive=false";
};
ComputeSignature dataNodeSignature = switch (type) {
case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY;
case AUTO, TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE;
};
testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true);
}

enum ComputeSignature {
FILTER_IN_COMPUTE(
matchesList().item("LuceneSourceOperator")
Expand Down
Loading
Loading