Skip to content

Commit 151ea39

Browse files
authored
ESQL: Push down StartsWith and EndsWith functions to Lucene (#123381) (#124582)
Fixes #123067 Just like WildcardLike and RLike, some functions can be converted to Lucene queries. Here it's those two, which are nearly identical to WildcardLike This, like some other functions, needs a FoldContext. I'm using the static method for this here, but it's fixed in #123398, which I kept separated as it changes many files
1 parent b934b1d commit 151ea39

File tree

7 files changed

+387
-2
lines changed

7 files changed

+387
-2
lines changed

docs/changelog/123381.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 123381
2+
summary: Push down `StartsWith` and `EndsWith` functions to Lucene
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 123067

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,46 @@ false | null
965965
false | null
966966
;
967967

968+
startsWithLucenePushdown
969+
970+
from hosts
971+
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
972+
| keep host, host_group
973+
| sort host, host_group;
974+
975+
host:keyword | host_group:text
976+
beta | Kubernetes cluster
977+
beta | Kubernetes cluster
978+
beta | Kubernetes cluster
979+
;
980+
981+
startsWithLuceneDisabledPushdown
982+
983+
from hosts
984+
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
985+
| keep host, host_group
986+
| sort host, host_group;
987+
988+
host:keyword | host_group:text
989+
beta | Kubernetes cluster
990+
beta | Kubernetes cluster
991+
beta | Kubernetes cluster
992+
;
993+
994+
startsWithLucenePushdownIgnoreMultivalues
995+
996+
from hosts
997+
| where starts_with(description, "epsilon")
998+
| keep description
999+
| sort description;
1000+
1001+
warning:Line 2:9: evaluation of [starts_with(description, \"epsilon\")] failed, treating result as null. Only first 20 failures recorded.
1002+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
1003+
1004+
description:text
1005+
epsilon gw instance
1006+
;
1007+
9681008
substringOfText
9691009
required_capability: mv_warn
9701010

@@ -1193,6 +1233,138 @@ Bernatsky |false
11931233
;
11941234

11951235

1236+
endsWithLucenePushdown
1237+
1238+
from hosts
1239+
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
1240+
| keep host, host_group
1241+
| sort host, host_group;
1242+
1243+
host:keyword | host_group:text
1244+
beta | Kubernetes cluster
1245+
beta | Kubernetes cluster
1246+
beta | Kubernetes cluster
1247+
;
1248+
1249+
endsWithLuceneDisabledPushdown
1250+
1251+
from hosts
1252+
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
1253+
| keep host, host_group
1254+
| sort host, host_group;
1255+
1256+
host:keyword | host_group:text
1257+
beta | Kubernetes cluster
1258+
beta | Kubernetes cluster
1259+
beta | Kubernetes cluster
1260+
;
1261+
1262+
endsWithLucenePushdownIgnoreMultivalues
1263+
1264+
from hosts
1265+
| where ends_with(description, "host")
1266+
| keep description
1267+
| sort description;
1268+
1269+
warning:Line 2:9: evaluation of [ends_with(description, \"host\")] failed, treating result as null. Only first 20 failures recorded.
1270+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
1271+
1272+
description:text
1273+
;
1274+
1275+
1276+
lucenePushdownMultipleWhere
1277+
1278+
from hosts
1279+
| where starts_with(host, "bet")
1280+
| keep host, host_group
1281+
| sort host, host_group
1282+
| where ends_with(host_group, "cluster");
1283+
1284+
host:keyword | host_group:text
1285+
beta | Kubernetes cluster
1286+
beta | Kubernetes cluster
1287+
beta | Kubernetes cluster
1288+
;
1289+
1290+
lucenePushdownMultipleIndices
1291+
1292+
from airports* metadata _index
1293+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1294+
| keep abbrev, name, _index
1295+
| sort abbrev, name, _index;
1296+
1297+
abbrev:keyword | name:text | _index:keyword
1298+
LUH | Sahnewal | airports
1299+
LUH | Sahnewal | airports_mp
1300+
LUH | Sahnewal | airports_no_doc_values
1301+
LUH | Sahnewal | airports_not_indexed
1302+
LUH | Sahnewal | airports_not_indexed_nor_doc_values
1303+
LUH | Sahnewal | airports_web
1304+
;
1305+
1306+
lucenePushdownOr
1307+
1308+
from airports
1309+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH")
1310+
| keep abbrev, name
1311+
| sort abbrev, name;
1312+
1313+
abbrev:keyword | name:text
1314+
AUH | Abu Dhabi Int'l
1315+
LUH | Sahnewal
1316+
RUH | King Khalid Int'l
1317+
;
1318+
1319+
lucenePushdownMultipleOr
1320+
1321+
from airports
1322+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH") or starts_with(abbrev, "OOL")
1323+
| keep abbrev, name
1324+
| sort abbrev, name;
1325+
1326+
abbrev:keyword | name:text
1327+
AUH | Abu Dhabi Int'l
1328+
LUH | Sahnewal
1329+
OOL | Gold Coast
1330+
RUH | King Khalid Int'l
1331+
;
1332+
1333+
lucenePushdownMultipleAnd
1334+
1335+
from airports metadata _index
1336+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1337+
| where ends_with(name::keyword, "al")
1338+
| keep abbrev, name, _index
1339+
| sort abbrev, name, _index;
1340+
1341+
abbrev:keyword | name:text | _index:keyword
1342+
LUH | Sahnewal | airports
1343+
;
1344+
1345+
lucenePushdownMixAndOr
1346+
1347+
from airports
1348+
| where starts_with(name::keyword, "Sahn") and (starts_with(name::keyword, "Abc") or ends_with(abbrev, "UH"))
1349+
| keep abbrev, name, scalerank
1350+
| sort abbrev, name;
1351+
1352+
abbrev:keyword | name:text | scalerank:integer
1353+
LUH | Sahnewal | 9
1354+
;
1355+
1356+
lucenePushdownMixOrAnd
1357+
1358+
from airports* metadata _index
1359+
| where starts_with(name::keyword, "Sahn") or (starts_with(abbrev, "G") and ends_with(name::keyword, "Falls Int'l"))
1360+
| where ends_with(_index, "airports")
1361+
| keep abbrev, name, scalerank, _index
1362+
| sort abbrev;
1363+
1364+
abbrev:keyword | name:text | scalerank:integer | _index:keyword
1365+
GTF | Great Falls Int'l | 8 | airports
1366+
LUH | Sahnewal | 9 | airports
1367+
;
11961368

11971369
toLowerRow#[skip:-8.12.99]
11981370
// tag::to_lower[]

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class EndsWith extends EsqlScalarFunction {
43+
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);
3645

3746
private final Expression str;
@@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
129138
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
130139
}
131140

141+
@Override
142+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
143+
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
144+
}
145+
146+
@Override
147+
public Query asQuery(TranslatorHandler handler) {
148+
LucenePushdownPredicates.checkIsPushableAttribute(str);
149+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
150+
151+
// TODO: Get the real FoldContext here
152+
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));
153+
154+
return new WildcardQuery(source(), fieldName, wildcardQuery);
155+
}
156+
157+
@Override
158+
public Expression singleValueField() {
159+
return str;
160+
}
161+
132162
Expression str() {
133163
return str;
134164
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class StartsWith extends EsqlScalarFunction {
43+
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3645
Expression.class,
3746
"StartsWith",
@@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
126135
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
127136
}
128137

138+
@Override
139+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
140+
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
141+
}
142+
143+
@Override
144+
public Query asQuery(TranslatorHandler handler) {
145+
LucenePushdownPredicates.checkIsPushableAttribute(str);
146+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
147+
148+
// TODO: Get the real FoldContext here
149+
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";
150+
151+
return new WildcardQuery(source(), fieldName, wildcardQuery);
152+
}
153+
154+
@Override
155+
public Expression singleValueField() {
156+
return str;
157+
}
158+
129159
Expression str() {
130160
return str;
131161
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithTests.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,21 @@
1212

1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xpack.esql.core.expression.Expression;
15+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
16+
import org.elasticsearch.xpack.esql.core.expression.Literal;
17+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1518
import org.elasticsearch.xpack.esql.core.tree.Source;
1619
import org.elasticsearch.xpack.esql.core.type.DataType;
20+
import org.elasticsearch.xpack.esql.core.type.EsField;
1721
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
1822
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
23+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
24+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
1925
import org.hamcrest.Matcher;
2026

2127
import java.util.LinkedList;
2228
import java.util.List;
29+
import java.util.Map;
2330
import java.util.function.Supplier;
2431

2532
import static org.hamcrest.Matchers.equalTo;
@@ -98,4 +105,38 @@ private static TestCaseSupplier.TestCase testCase(
98105
protected Expression build(Source source, List<Expression> args) {
99106
return new EndsWith(source, args.get(0), args.get(1));
100107
}
108+
109+
public void testLuceneQuery_AllLiterals_NonTranslatable() {
110+
var function = new EndsWith(
111+
Source.EMPTY,
112+
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
113+
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
114+
);
115+
116+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
117+
}
118+
119+
public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() {
120+
var function = new EndsWith(
121+
Source.EMPTY,
122+
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
123+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true))
124+
);
125+
126+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
127+
}
128+
129+
public void testLuceneQuery_NonFoldableSuffix_Translatable() {
130+
var function = new EndsWith(
131+
Source.EMPTY,
132+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
133+
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
134+
);
135+
136+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));
137+
138+
var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
139+
140+
assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
141+
}
101142
}

0 commit comments

Comments
 (0)