Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/123381.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 123381
summary: Push down `StartsWith` and `EndsWith` functions to Lucene
area: ES|QL
type: enhancement
issues:
- 123067
172 changes: 172 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,46 @@ false | null
false | null
;

startsWithLucenePushdown

from hosts
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

startsWithLuceneDisabledPushdown

from hosts
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

startsWithLucenePushdownIgnoreMultivalues

from hosts
| where starts_with(description, "epsilon")
| keep description
| sort description;

warning:Line 2:9: evaluation of [starts_with(description, \"epsilon\")] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value

description:text
epsilon gw instance
;

substringOfText

from hosts | where host=="epsilon" | eval l1 = substring(host_group, 0, 5), l2 = substring(description, 0, 5) | keep l1, l2;
Expand Down Expand Up @@ -1199,6 +1239,138 @@ Bernatsky |false
;


endsWithLucenePushdown

from hosts
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

endsWithLuceneDisabledPushdown

from hosts
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

endsWithLucenePushdownIgnoreMultivalues

from hosts
| where ends_with(description, "host")
| keep description
| sort description;

warning:Line 2:9: evaluation of [ends_with(description, \"host\")] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value

description:text
;


lucenePushdownMultipleWhere

from hosts
| where starts_with(host, "bet")
| keep host, host_group
| sort host, host_group
| where ends_with(host_group, "cluster");

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

lucenePushdownMultipleIndices

from airports* metadata _index
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
| keep abbrev, name, _index
| sort abbrev, name, _index;

abbrev:keyword | name:text | _index:keyword
LUH | Sahnewal | airports
LUH | Sahnewal | airports_mp
LUH | Sahnewal | airports_no_doc_values
LUH | Sahnewal | airports_not_indexed
LUH | Sahnewal | airports_not_indexed_nor_doc_values
LUH | Sahnewal | airports_web
;

lucenePushdownOr

from airports
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH")
| keep abbrev, name
| sort abbrev, name;

abbrev:keyword | name:text
AUH | Abu Dhabi Int'l
LUH | Sahnewal
RUH | King Khalid Int'l
;

lucenePushdownMultipleOr

from airports
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH") or starts_with(abbrev, "OOL")
| keep abbrev, name
| sort abbrev, name;

abbrev:keyword | name:text
AUH | Abu Dhabi Int'l
LUH | Sahnewal
OOL | Gold Coast
RUH | King Khalid Int'l
;

lucenePushdownMultipleAnd

from airports metadata _index
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
| where ends_with(name::keyword, "al")
| keep abbrev, name, _index
| sort abbrev, name, _index;

abbrev:keyword | name:text | _index:keyword
LUH | Sahnewal | airports
;

lucenePushdownMixAndOr

from airports
| where starts_with(name::keyword, "Sahn") and (starts_with(name::keyword, "Abc") or ends_with(abbrev, "UH"))
| keep abbrev, name, scalerank
| sort abbrev, name;

abbrev:keyword | name:text | scalerank:integer
LUH | Sahnewal | 9
;

lucenePushdownMixOrAnd

from airports* metadata _index
| where starts_with(name::keyword, "Sahn") or (starts_with(abbrev, "G") and ends_with(name::keyword, "Falls Int'l"))
| where ends_with(_index, "airports")
| keep abbrev, name, scalerank, _index
| sort abbrev;

abbrev:keyword | name:text | scalerank:integer | _index:keyword
GTF | Great Falls Int'l | 8 | airports
LUH | Sahnewal | 9 | airports
;

toLowerRow#[skip:-8.12.99]
// tag::to_lower[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand All @@ -22,6 +29,8 @@
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -31,7 +40,7 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;

public class EndsWith extends EsqlScalarFunction {
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);

private final Expression str;
Expand Down Expand Up @@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
}

@Override
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
}

@Override
public Query asQuery(TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

// TODO: Get the real FoldContext here
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));

return new WildcardQuery(source(), fieldName, wildcardQuery);
}

@Override
public Expression singleValueField() {
return str;
}

Expression str() {
return str;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand All @@ -22,6 +29,8 @@
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -31,7 +40,7 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;

public class StartsWith extends EsqlScalarFunction {
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"StartsWith",
Expand Down Expand Up @@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
}

@Override
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
}

@Override
public Query asQuery(TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

// TODO: Get the real FoldContext here
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";

return new WildcardQuery(source(), fieldName, wildcardQuery);
}

@Override
public Expression singleValueField() {
return str;
}

Expression str() {
return str;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
import org.hamcrest.Matcher;

import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -98,4 +105,38 @@ private static TestCaseSupplier.TestCase testCase(
protected Expression build(Source source, List<Expression> args) {
return new EndsWith(source, args.get(0), args.get(1));
}

public void testLuceneQuery_AllLiterals_NonTranslatable() {
var function = new EndsWith(
Source.EMPTY,
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() {
var function = new EndsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true))
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldableSuffix_Translatable() {
var function = new EndsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));

var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);

assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
}
}
Loading