Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/123381.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 123381
summary: Push down `StarsWith` and `EndsWith` functions to Lucene
area: ES|QL
type: enhancement
issues:
- 123067
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,46 @@ false | null
false | null
;

startsWithLucenePushdown

from hosts
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

startsWithLuceneDisabledPushdown

from hosts
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

startsWithLucenePushdownIgnoreMultivalues

from hosts
| where starts_with(description, "epsilon")
| keep description
| sort description;

warning:Line 2:9: evaluation of [starts_with(description, \"epsilon\")] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value

description:text
epsilon gw instance
;

substringOfText

from hosts | where host=="epsilon" | eval l1 = substring(host_group, 0, 5), l2 = substring(description, 0, 5) | keep l1, l2;
Expand Down Expand Up @@ -1199,6 +1239,45 @@ Bernatsky |false
;


endsWithLucenePushdown

from hosts
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

endsWithLuceneDisabledPushdown

from hosts
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
| keep host, host_group
| sort host, host_group;

host:keyword | host_group:text
beta | Kubernetes cluster
beta | Kubernetes cluster
beta | Kubernetes cluster
;

endsWithLucenePushdownIgnoreMultivalues

from hosts
| where ends_with(description, "host")
| keep description
| sort description;

warning:Line 2:9: evaluation of [ends_with(description, \"host\")] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value

description:text
;


toLowerRow#[skip:-8.12.99]
// tag::to_lower[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand All @@ -22,6 +29,8 @@
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -31,7 +40,7 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;

public class EndsWith extends EsqlScalarFunction {
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);

private final Expression str;
Expand Down Expand Up @@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
}

@Override
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
}

@Override
public Query asQuery(TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

// TODO: Get the real FoldContext here
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));

return new WildcardQuery(source(), fieldName, wildcardQuery);
}

@Override
public Expression singleValueField() {
return str;
}

Expression str() {
return str;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,20 @@

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
Expand All @@ -22,6 +29,8 @@
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -31,7 +40,7 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;

public class StartsWith extends EsqlScalarFunction {
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"StartsWith",
Expand Down Expand Up @@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
}

@Override
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
}

@Override
public Query asQuery(TranslatorHandler handler) {
LucenePushdownPredicates.checkIsPushableAttribute(str);
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);

// TODO: Get the real FoldContext here
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";

return new WildcardQuery(source(), fieldName, wildcardQuery);
}

@Override
public Expression singleValueField() {
return str;
}

Expression str() {
return str;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
import org.hamcrest.Matcher;

import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -98,4 +105,38 @@ private static TestCaseSupplier.TestCase testCase(
protected Expression build(Source source, List<Expression> args) {
return new EndsWith(source, args.get(0), args.get(1));
}

public void testLuceneQuery_AllLiterals_NonTranslatable() {
var function = new EndsWith(
Source.EMPTY,
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() {
var function = new EndsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true))
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldableSuffix_Translatable() {
var function = new EndsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));

var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);

assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,20 @@

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -58,4 +65,38 @@ public static Iterable<Object[]> parameters() {
protected Expression build(Source source, List<Expression> args) {
return new StartsWith(source, args.get(0), args.get(1));
}

public void testLuceneQuery_AllLiterals_NonTranslatable() {
var function = new StartsWith(
Source.EMPTY,
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldablePrefix_NonTranslatable() {
var function = new StartsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
new FieldAttribute(Source.EMPTY, "field", new EsField("prefix", DataType.KEYWORD, Map.of(), true))
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
}

public void testLuceneQuery_NonFoldablePrefix_Translatable() {
var function = new StartsWith(
Source.EMPTY,
new FieldAttribute(Source.EMPTY, "field", new EsField("prefix", DataType.KEYWORD, Map.of(), true)),
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
);

assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));

var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);

assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "a\\*b\\?c\\\\*")));
}
}