From ee5601811c530216030d85bbc54657d534807b75 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Fri, 18 Jul 2025 10:38:39 -0400
Subject: [PATCH 01/44] Initial plumbing for an ES|QL extract_snippets function
---
.../esql/expression/ExpressionWritables.java | 2 +
.../function/EsqlFunctionRegistry.java | 4 +-
.../scalar/string/ExtractSnippets.java | 221 ++++++++++++++++++
3 files changed, 226 insertions(+), 1 deletion(-)
create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java
index 311f666581279..a8b01a749f1a0 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java
@@ -75,6 +75,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax;
import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim;
@@ -223,6 +224,7 @@ public static List unaryScalars() {
entries.add(WildcardLike.ENTRY);
entries.add(WildcardLikeList.ENTRY);
entries.add(Delay.ENTRY);
+ entries.add(ExtractSnippets.ENTRY);
// mv functions
entries.addAll(MvFunctionWritables.getNamedWriteables());
return entries;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
index 0c2629596a9b4..590a40557bc03 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
@@ -159,6 +159,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left;
@@ -491,7 +492,8 @@ private static FunctionDefinition[][] snapshotFunctions() {
def(StGeohex.class, StGeohex::new, "st_geohex"),
def(StGeohexToLong.class, StGeohexToLong::new, "st_geohex_to_long"),
def(StGeohexToString.class, StGeohexToString::new, "st_geohex_to_string"),
- def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine") } };
+ def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"),
+ def(ExtractSnippets.class, ExtractSnippets::new, "extract_snippets") } };
}
public EsqlFunctionRegistry snapshotRegistry() {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
new file mode 100644
index 0000000000000..9066724cece1c
--- /dev/null
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -0,0 +1,221 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.compute.lucene.LuceneQueryEvaluator;
+import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator;
+import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.Example;
+import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
+import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
+import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
+import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
+import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
+
+/**
+ * Extract snippets function, that extracts the most relevant snippets from a given input string
+ */
+public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument {
+ public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
+ Expression.class,
+ "ExtractSnippets",
+ ExtractSnippets::new
+ );
+
+ private static final int DEFAULT_NUM_SNIPPETS = 1;
+ private static final int DEFAULT_SNIPPET_LENGTH = 10; // TODO determine a good default. 512 * 5?
+
+ // TODO better names?
+ private final Expression field, str, numSnippets, snippetLength;
+
+ @FunctionInfo(
+ returnType = "keyword",
+ description = """
+ Extracts the most relevant snippets to return from a given input string""",
+ examples = @Example(file = "keyword", tag = "extract_snippets")
+ )
+ public ExtractSnippets(
+ Source source,
+ @Param(name = "field", type = { "keyword" }, description = "The input string") Expression field,
+ @Param(name = "str", type = { "keyword", "text" }, description = "The input string") Expression str,
+ @Param(
+ optional = true,
+ name = "num_snippets",
+ type = { "integer" },
+ description = "The number of snippets to return. Defaults to " + DEFAULT_NUM_SNIPPETS
+ ) Expression numSnippets,
+ @Param(
+ optional = true,
+ name = "snippet_length",
+ type = { "integer" },
+ description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH
+ ) Expression snippetLength
+ ) {
+ super(source, numSnippets == null ? Collections.singletonList(str) : Arrays.asList(str, numSnippets));
+ this.field = field;
+ this.str = str;
+ this.numSnippets = numSnippets;
+ this.snippetLength = snippetLength;
+ }
+
+ private ExtractSnippets(StreamInput in) throws IOException {
+ this(
+ Source.readFrom((PlanStreamInput) in),
+ in.readNamedWriteable(Expression.class),
+ in.readNamedWriteable(Expression.class),
+ in.readOptionalNamedWriteable(Expression.class),
+ in.readOptionalNamedWriteable(Expression.class)
+ );
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ source().writeTo(out);
+ out.writeNamedWriteable(field);
+ out.writeNamedWriteable(str);
+ out.writeOptionalNamedWriteable(numSnippets);
+ out.writeOptionalNamedWriteable(snippetLength);
+ }
+
+ @Override
+ public String getWriteableName() {
+ return ENTRY.name;
+ }
+
+ @Override
+ public DataType dataType() {
+ return field.dataType().noText();
+ }
+
+ @Override
+ protected TypeResolution resolveType() {
+ if (childrenResolved() == false) {
+ return new TypeResolution("Unresolved children");
+ }
+
+ TypeResolution resolution = isString(field, sourceText(), FIRST);
+ if (resolution.unresolved()) {
+ return resolution;
+ }
+
+ resolution = isString(str, sourceText(), SECOND);
+ if (resolution.unresolved()) {
+ return resolution;
+ }
+
+ resolution = numSnippets == null
+ ? TypeResolution.TYPE_RESOLVED
+ : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer");
+ if (resolution.unresolved()) {
+ return resolution;
+ }
+
+ return snippetLength == null
+ ? TypeResolution.TYPE_RESOLVED
+ : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer");
+ }
+
+ @Override
+ public boolean foldable() {
+ return field.foldable()
+ && str.foldable()
+ && (numSnippets == null || numSnippets.foldable())
+ && (snippetLength == null || snippetLength.foldable());
+ }
+
+ // @Evaluator
+ // static BytesRef process(BytesRef field, BytesRef str, int numSnippets, int snippetLength) {
+ // if (field == null || field.length == 0 || str == null || str.length == 0) {
+ // return null;
+ // }
+ //
+ // String utf8Field = field.utf8ToString();
+ // String utf8Str = str.utf8ToString();
+ // if (snippetLength > utf8Field.length()) {
+ // return field;
+ // }
+ //
+ // // TODO - actually calculate snippets using search string, this truncation is just a placeholder
+ // List snippets = new ArrayList<>(numSnippets);
+ // int pos = 0;
+ // for (int i = 0; i < numSnippets && pos < utf8Field.length(); i++) {
+ // int end = Math.min(pos + snippetLength, utf8Field.length());
+ // String snippet = utf8Field.substring(pos, end);
+ // snippets.add(snippet);
+ // pos += snippetLength;
+ // }
+ // return snippets.get(0);
+ // }
+ //
+ // @Evaluator(extraName = "NoStart")
+ // static BytesRef process(BytesRef field, BytesRef str) {
+ // return process(field, str, DEFAULT_NUM_SNIPPETS, DEFAULT_SNIPPET_LENGTH);
+ // }
+
+ @Override
+ public Expression replaceChildren(List newChildren) {
+ return new ExtractSnippets(
+ source(),
+ newChildren.get(0),
+ newChildren.get(1),
+ numSnippets == null ? null : newChildren.get(1),
+ snippetLength == null ? null : newChildren.get(2)
+ );
+ }
+
+ @Override
+ protected NodeInfo extends Expression> info() {
+ return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength);
+ }
+
+ @Override
+ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
+ List shardContexts = toEvaluator.shardContexts();
+ LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
+ int i = 0;
+ for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
+ shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher());
+ }
+ return new LuceneQueryExpressionEvaluator.Factory(shardConfigs);
+ }
+
+ }
+
+ Expression str() {
+ return str;
+ }
+
+ Expression numSnippets() {
+ return numSnippets;
+ }
+
+ Expression snippetLength() {
+ return snippetLength;
+ }
+}
From eb0a8769550939bce0444692c4e58fc2eaf6b0c5 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 29 Jul 2025 15:05:33 -0400
Subject: [PATCH 02/44] Add HighlighterExpressionEvaluator
---
.../HighlighterExpressionEvaluator.java | 68 ++++++++++++++++++
.../scalar/string/ExtractSnippets.java | 70 +++++++------------
2 files changed, 94 insertions(+), 44 deletions(-)
create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
new file mode 100644
index 0000000000000..0d2dff8bb6f0e
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.lucene;
+
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.data.Vector;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.EvalOperator;
+
+import java.io.IOException;
+
+public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
+ implements
+ EvalOperator.ExpressionEvaluator {
+
+ HighlighterExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shardConfigs) {
+ super(blockFactory, shardConfigs);
+ }
+
+ @Override
+ protected ScoreMode scoreMode() {
+ return ScoreMode.COMPLETE;
+ }
+
+ @Override
+ protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
+ return blockFactory.newConstantBytesRefVector(new BytesRef(), size);
+ }
+
+ @Override
+ protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
+ return blockFactory.newBytesRefVectorBuilder(size);
+ }
+
+ @Override
+ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException {
+ // TODO: add snippets here
+ builder.appendBytesRef(new BytesRef("highlighted text")); // Placeholder for actual highlighted text
+ }
+
+ @Override
+ protected void appendNoMatch(BytesRefVector.Builder builder) {
+ // TODO: No-op?
+ }
+
+ @Override
+ public Block eval(Page page) {
+ return executeQuery(page);
+ }
+
+ public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
+ @Override
+ public EvalOperator.ExpressionEvaluator get(DriverContext context) {
+ return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs);
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 9066724cece1c..6369cb7c0e641 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -10,10 +10,13 @@
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator;
import org.elasticsearch.compute.lucene.LuceneQueryEvaluator;
-import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
+import org.elasticsearch.index.query.MatchQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -26,15 +29,16 @@
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
+import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
@@ -119,73 +123,44 @@ protected TypeResolution resolveType() {
return new TypeResolution("Unresolved children");
}
- TypeResolution resolution = isString(field, sourceText(), FIRST);
+ TypeResolution resolution = isString(field(), sourceText(), FIRST);
if (resolution.unresolved()) {
return resolution;
}
- resolution = isString(str, sourceText(), SECOND);
+ resolution = isString(str(), sourceText(), SECOND);
if (resolution.unresolved()) {
return resolution;
}
- resolution = numSnippets == null
+ resolution = numSnippets() == null
? TypeResolution.TYPE_RESOLVED
- : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer");
+ : isType(numSnippets(), dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer");
if (resolution.unresolved()) {
return resolution;
}
- return snippetLength == null
+ return snippetLength() == null
? TypeResolution.TYPE_RESOLVED
- : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer");
+ : isType(snippetLength(), dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer");
}
@Override
public boolean foldable() {
- return field.foldable()
- && str.foldable()
- && (numSnippets == null || numSnippets.foldable())
- && (snippetLength == null || snippetLength.foldable());
+ return field().foldable()
+ && str().foldable()
+ && (numSnippets() == null || numSnippets().foldable())
+ && (snippetLength() == null || snippetLength().foldable());
}
- // @Evaluator
- // static BytesRef process(BytesRef field, BytesRef str, int numSnippets, int snippetLength) {
- // if (field == null || field.length == 0 || str == null || str.length == 0) {
- // return null;
- // }
- //
- // String utf8Field = field.utf8ToString();
- // String utf8Str = str.utf8ToString();
- // if (snippetLength > utf8Field.length()) {
- // return field;
- // }
- //
- // // TODO - actually calculate snippets using search string, this truncation is just a placeholder
- // List snippets = new ArrayList<>(numSnippets);
- // int pos = 0;
- // for (int i = 0; i < numSnippets && pos < utf8Field.length(); i++) {
- // int end = Math.min(pos + snippetLength, utf8Field.length());
- // String snippet = utf8Field.substring(pos, end);
- // snippets.add(snippet);
- // pos += snippetLength;
- // }
- // return snippets.get(0);
- // }
- //
- // @Evaluator(extraName = "NoStart")
- // static BytesRef process(BytesRef field, BytesRef str) {
- // return process(field, str, DEFAULT_NUM_SNIPPETS, DEFAULT_SNIPPET_LENGTH);
- // }
-
@Override
public Expression replaceChildren(List newChildren) {
return new ExtractSnippets(
source(),
+ field,
newChildren.get(0),
- newChildren.get(1),
numSnippets == null ? null : newChildren.get(1),
- snippetLength == null ? null : newChildren.get(2)
+ snippetLength
);
}
@@ -202,9 +177,16 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher());
}
- return new LuceneQueryExpressionEvaluator.Factory(shardConfigs);
+ return new HighlighterExpressionEvaluator.Factory(shardConfigs);
+
+ }
+
+ private QueryBuilder queryBuilder() {
+ return new MatchQueryBuilder(field.sourceText(), str.sourceText());
}
+ Expression field() {
+ return field;
}
Expression str() {
From 8c0f312226b3c9eb8bae4865bd036ad63ae67442 Mon Sep 17 00:00:00 2001
From: carlosdelest
Date: Wed, 30 Jul 2025 17:40:46 +0200
Subject: [PATCH 03/44] Pair programming session
---
.../xpack/esql/plugin/ExtractSnippetsIT.java | 75 +++++++++++++++++++
.../scalar/string/ExtractSnippets.java | 8 +-
.../xpack/esql/analysis/AnalyzerTests.java | 13 +++-
3 files changed, 91 insertions(+), 5 deletions(-)
create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
new file mode 100644
index 0000000000000..4b5affa2f6fc3
--- /dev/null
+++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.plugin;
+
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.support.WriteRequest;
+import org.elasticsearch.client.internal.IndicesAdminClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
+import org.junit.Before;
+
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+
+//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug")
+public class ExtractSnippetsIT extends AbstractEsqlIntegTestCase {
+
+ @Before
+ public void setupIndex() {
+ createAndPopulateIndex(this::ensureYellow);
+ }
+
+ public void testExtractSnippets() {
+ var query = """
+ FROM test
+ | EVAL x = extract_snippets(content, "fox", 1, 10)
+ | KEEP x
+ """;
+
+ try (var resp = run(query)) {
+ assertColumnNames(resp.columns(), List.of("x"));
+ assertColumnTypes(resp.columns(), List.of("integer"));
+ assertValues(resp.values(), List.of(List.of(1), List.of(6)));
+ }
+ }
+
+
+
+ static void createAndPopulateIndex(Consumer ensureYellow) {
+ var indexName = "test";
+ var client = client().admin().indices();
+ var createRequest = client.prepareCreate(indexName)
+ .setSettings(Settings.builder().put("index.number_of_shards", 1))
+ .setMapping("id", "type=integer", "content", "type=text");
+ assertAcked(createRequest);
+ client().prepareBulk()
+ .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox"))
+ .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog"))
+ .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown"))
+ .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long"))
+ .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat"))
+ .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog"))
+ .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
+ .get();
+
+ var lookupIndexName = "test_lookup";
+ createAndPopulateLookupIndex(client, lookupIndexName);
+
+ ensureYellow.accept(new String[] { indexName, lookupIndexName });
+ }
+
+ static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) {
+ var createRequest = client.prepareCreate(lookupIndexName)
+ .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup"))
+ .setMapping("id", "type=integer", "lookup_content", "type=text");
+ assertAcked(createRequest);
+ }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 6369cb7c0e641..fc13767a01ceb 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -81,7 +81,7 @@ public ExtractSnippets(
description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH
) Expression snippetLength
) {
- super(source, numSnippets == null ? Collections.singletonList(str) : Arrays.asList(str, numSnippets));
+ super(source, List.of(field, str, numSnippets, snippetLength));
this.field = field;
this.str = str;
this.numSnippets = numSnippets;
@@ -157,10 +157,10 @@ && str().foldable()
public Expression replaceChildren(List newChildren) {
return new ExtractSnippets(
source(),
- field,
newChildren.get(0),
- numSnippets == null ? null : newChildren.get(1),
- snippetLength
+ newChildren.get(1),
+ numSnippets == null ? null : newChildren.get(2),
+ newChildren.get(3)
);
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
index 439e10cce27d4..32cfe6daaf92e 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
@@ -17,6 +17,7 @@
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.xpack.esql.EsqlTestUtils;
import org.elasticsearch.xpack.esql.LoadMapping;
import org.elasticsearch.xpack.esql.VerificationException;
@@ -143,7 +144,7 @@
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.startsWith;
-//@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug")
+@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug")
public class AnalyzerTests extends ESTestCase {
private static final UnresolvedRelation UNRESOLVED_RELATION = new UnresolvedRelation(
@@ -2870,6 +2871,16 @@ public void testFromEnrichAndMatchColonUsage() {
assertEquals(esRelation.indexPattern(), "test");
}
+ public void testSnippets() {
+ LogicalPlan plan = analyze("""
+ from test
+ | EVAL x = extract_snippets(first_name, "text", 1, 10)
+ | KEEP x
+ """);
+ var limit = as(plan, Limit.class);
+ var filter = as(limit.child(), Filter.class);
+ }
+
public void testFunctionNamedParamsAsFunctionArgument() {
LogicalPlan plan = analyze("""
from test
From 86dc82ae3efb62268e6e9b8bdfe7d908c15e77f8 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Wed, 30 Jul 2025 14:25:49 -0400
Subject: [PATCH 04/44] Create highlight query
---
.../HighlighterExpressionEvaluator.java | 5 ++-
.../scalar/string/ExtractSnippets.java | 14 +++++--
.../planner/EsPhysicalOperationProviders.java | 42 +++++++++++++++++++
3 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 0d2dff8bb6f0e..f2a4e1d00c0d7 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -46,12 +46,12 @@ protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory,
@Override
protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException {
// TODO: add snippets here
- builder.appendBytesRef(new BytesRef("highlighted text")); // Placeholder for actual highlighted text
+ builder.appendBytesRef(new BytesRef("I am a snippet")); // Placeholder for actual highlighted text
}
@Override
protected void appendNoMatch(BytesRefVector.Builder builder) {
- // TODO: No-op?
+ builder.appendBytesRef(null);
}
@Override
@@ -62,6 +62,7 @@ public Block eval(Page page) {
public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
+ // TODO: Is it possible to add the highlight queyr here, rather than in ExtractSnippets?
return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs);
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index fc13767a01ceb..e8644e94334c1 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -15,6 +15,7 @@
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -157,10 +158,10 @@ && str().foldable()
public Expression replaceChildren(List newChildren) {
return new ExtractSnippets(
source(),
- newChildren.get(0),
- newChildren.get(1),
+ newChildren.get(0), // field
+ newChildren.get(1), // str
numSnippets == null ? null : newChildren.get(2),
- newChildren.get(3)
+ snippetLength == null ? null : newChildren.get(3)
);
}
@@ -175,6 +176,13 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
+ shardContext.addHighlightQuery(
+ field.sourceText(),
+ str.sourceText(),
+ Integer.parseInt(numSnippets.sourceText()),
+ Integer.parseInt(snippetLength.sourceText()),
+ queryBuilder()
+ );
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher());
}
return new HighlighterExpressionEvaluator.Factory(shardConfigs);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
index e0b570267899b..e37670f6bd3fc 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
@@ -52,6 +52,8 @@
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.AliasFilter;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortAndFormats;
@@ -136,6 +138,8 @@ public boolean hasReferences() {
* need one in ten documents.
*/
public abstract double storedFieldsSequentialProportion();
+
+ public abstract void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder);
}
private final List shardContexts;
@@ -498,6 +502,44 @@ public double storedFieldsSequentialProportion() {
return EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.get(ctx.getIndexSettings().getSettings());
}
+ @Override
+ public void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder) {
+ try {
+ // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
+ HighlightBuilder highlightBuilder = new HighlightBuilder();
+ if (queryBuilder != null) {
+ highlightBuilder.highlightQuery(queryBuilder);
+ }
+ // Stripping pre/post tags as they're not useful for snippet creation
+ highlightBuilder.field(field).preTags("").postTags("");
+ // Return highest scoring fragments
+ highlightBuilder.order(HighlightBuilder.Order.SCORE);
+ highlightBuilder.numOfFragments(numSnippets);
+ highlightBuilder.fragmentSize(snippetLength);
+ highlightBuilder.noMatchSize(snippetLength);
+
+ SearchHighlightContext highlightContext = highlightBuilder.build(ctx);
+
+ // Update the active SearchContext with the highlight context
+ if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) {
+ searchContext.highlight(highlightContext);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Failed to create highlight context for field ["
+ + field
+ + "], str ["
+ + str
+ + "], numSnippets: ["
+ + numSnippets
+ + "], snippetLength: ["
+ + snippetLength
+ + "]",
+ e
+ );
+ }
+ }
+
@Override
public void close() {
releasable.close();
From 4f4f157a05368c03cc87cf42410f0682167a349d Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Fri, 1 Aug 2025 13:20:15 -0400
Subject: [PATCH 05/44] Make extract snippets rewriteable
---
.../HighlighterExpressionEvaluator.java | 6 +-
.../esql/capabilities/RewriteableAware.java | 19 ++++++
.../function/EsqlFunctionRegistry.java | 2 +-
.../function/fulltext/FullTextFunction.java | 7 +-
.../fulltext/QueryBuilderResolver.java | 20 +++---
.../scalar/string/ExtractSnippets.java | 66 +++++++++++++++----
6 files changed, 91 insertions(+), 29 deletions(-)
create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index f2a4e1d00c0d7..aeaff93ac0a53 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -51,7 +51,8 @@ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) thro
@Override
protected void appendNoMatch(BytesRefVector.Builder builder) {
- builder.appendBytesRef(null);
+ // NOTE: Carlos originally suggested that we add null here, but that doesn't work - errors on missing key
+ builder.appendBytesRef(new BytesRef());
}
@Override
@@ -62,7 +63,8 @@ public Block eval(Page page) {
public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
- // TODO: Is it possible to add the highlight queyr here, rather than in ExtractSnippets?
+ // TODO: Is it possible to add the highlight query here, rather than in ExtractSnippets? Would require ShardConfig having access
+ // to context
return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs);
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
new file mode 100644
index 0000000000000..4c4a34b60e46e
--- /dev/null
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.capabilities;
+
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+
+public interface RewriteableAware {
+
+ QueryBuilder queryBuilder();
+
+ Expression replaceQueryBuilder(QueryBuilder queryBuilder);
+
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
index 590a40557bc03..45f01a0face86 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
@@ -493,7 +493,7 @@ private static FunctionDefinition[][] snapshotFunctions() {
def(StGeohexToLong.class, StGeohexToLong::new, "st_geohex_to_long"),
def(StGeohexToString.class, StGeohexToString::new, "st_geohex_to_string"),
def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"),
- def(ExtractSnippets.class, ExtractSnippets::new, "extract_snippets") } };
+ def(ExtractSnippets.class, quad(ExtractSnippets::new), "extract_snippets") } };
}
public EsqlFunctionRegistry snapshotRegistry() {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
index b5378db783f46..107bb4c14e4f6 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
@@ -17,6 +17,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
+import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -68,7 +69,8 @@ public abstract class FullTextFunction extends Function
TranslationAware,
PostAnalysisPlanVerificationAware,
EvaluatorMapper,
- ExpressionScoreMapper {
+ ExpressionScoreMapper,
+ RewriteableAware {
private final Expression query;
private final QueryBuilder queryBuilder;
@@ -163,14 +165,13 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler);
}
+ @Override
public QueryBuilder queryBuilder() {
return queryBuilder;
}
protected abstract Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler);
- public abstract Expression replaceQueryBuilder(QueryBuilder queryBuilder);
-
@Override
public BiConsumer postAnalysisPlanVerification() {
return FullTextFunction::checkFullTextQueryFunctions;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index ef3828a3f2fbb..9267d039aaf91 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -13,6 +13,7 @@
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.xpack.esql.core.util.Holder;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -35,14 +36,15 @@ public final class QueryBuilderResolver {
private QueryBuilderResolver() {}
public static void resolveQueryBuilders(LogicalPlan plan, TransportActionServices services, ActionListener listener) {
- var hasFullTextFunctions = plan.anyMatch(p -> {
- Holder hasFullTextFunction = new Holder<>(false);
- p.forEachExpression(FullTextFunction.class, unused -> hasFullTextFunction.set(true));
- return hasFullTextFunction.get();
+ var hasRewriteableAwareFunctions = plan.anyMatch(p -> {
+ Holder hasRewriteable = new Holder<>(false);
+ p.forEachExpression(FullTextFunction.class, unused -> hasRewriteable.set(true));
+ p.forEachExpression(ExtractSnippets.class, unused -> hasRewriteable.set(true));
+ return hasRewriteable.get();
});
- if (hasFullTextFunctions) {
+ if (hasRewriteableAwareFunctions) {
Rewriteable.rewriteAndFetch(
- new FullTextFunctionsRewritable(plan),
+ new FunctionsRewritable(plan),
queryRewriteContext(services, indexNames(plan)),
listener.delegateFailureAndWrap((l, r) -> l.onResponse(r.plan))
);
@@ -70,9 +72,9 @@ private static Set indexNames(LogicalPlan plan) {
return indexNames;
}
- private record FullTextFunctionsRewritable(LogicalPlan plan) implements Rewriteable {
+ private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable {
@Override
- public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
+ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
Holder exceptionHolder = new Holder<>();
Holder updated = new Holder<>(false);
LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> {
@@ -92,7 +94,7 @@ public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOExc
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
- return updated.get() ? new FullTextFunctionsRewritable(newPlan) : this;
+ return updated.get() ? new FunctionsRewritable(newPlan) : this;
}
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index e8644e94334c1..a163417eebd4d 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -7,6 +7,7 @@
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@@ -15,9 +16,8 @@
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
+import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -25,28 +25,27 @@
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
+import java.util.Objects;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
-import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
/**
* Extract snippets function, that extracts the most relevant snippets from a given input string
*/
-public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument {
+// TODO: Does this also need to implement TranslationAware?
+public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"ExtractSnippets",
@@ -58,6 +57,7 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum
// TODO better names?
private final Expression field, str, numSnippets, snippetLength;
+ private final QueryBuilder queryBuilder;
@FunctionInfo(
returnType = "keyword",
@@ -81,21 +81,34 @@ public ExtractSnippets(
type = { "integer" },
description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH
) Expression snippetLength
+ ) {
+ this(source, field, str, numSnippets, snippetLength, new MatchQueryBuilder(field.sourceText(), str.sourceText()));
+ }
+
+ public ExtractSnippets(
+ Source source,
+ Expression field,
+ Expression str,
+ Expression numSnippets,
+ Expression snippetLength,
+ QueryBuilder queryBuilder
) {
super(source, List.of(field, str, numSnippets, snippetLength));
this.field = field;
this.str = str;
this.numSnippets = numSnippets;
this.snippetLength = snippetLength;
- }
+ this.queryBuilder = queryBuilder;
+ };
- private ExtractSnippets(StreamInput in) throws IOException {
+ public ExtractSnippets(StreamInput in) throws IOException {
this(
Source.readFrom((PlanStreamInput) in),
in.readNamedWriteable(Expression.class),
in.readNamedWriteable(Expression.class),
in.readOptionalNamedWriteable(Expression.class),
- in.readOptionalNamedWriteable(Expression.class)
+ in.readOptionalNamedWriteable(Expression.class),
+ in.readOptionalNamedWriteable(QueryBuilder.class)
);
}
@@ -106,6 +119,7 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeNamedWriteable(str);
out.writeOptionalNamedWriteable(numSnippets);
out.writeOptionalNamedWriteable(snippetLength);
+ out.writeOptionalNamedWriteable(queryBuilder);
}
@Override
@@ -181,16 +195,22 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
str.sourceText(),
Integer.parseInt(numSnippets.sourceText()),
Integer.parseInt(snippetLength.sourceText()),
- queryBuilder()
+ queryBuilder
);
- shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher());
+ shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher());
}
return new HighlighterExpressionEvaluator.Factory(shardConfigs);
}
- private QueryBuilder queryBuilder() {
- return new MatchQueryBuilder(field.sourceText(), str.sourceText());
+ @Override
+ public QueryBuilder queryBuilder() {
+ return queryBuilder;
+ }
+
+ @Override
+ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
+ return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder);
}
Expression field() {
@@ -208,4 +228,22 @@ Expression numSnippets() {
Expression snippetLength() {
return snippetLength;
}
+
+ @Override
+ public boolean equals(Object o) {
+ // Match does not serialize options, as they get included in the query builder. We need to override equals and hashcode to
+ // ignore options when comparing two Match functions
+ if (o == null || getClass() != o.getClass()) return false;
+ ExtractSnippets extractSnippets = (ExtractSnippets) o;
+ return Objects.equals(field(), extractSnippets.field())
+ && Objects.equals(str(), extractSnippets.str())
+ && Objects.equals(numSnippets(), extractSnippets.numSnippets())
+ && Objects.equals(snippetLength(), extractSnippets.snippetLength())
+ && Objects.equals(queryBuilder(), extractSnippets.queryBuilder());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(field(), str(), numSnippets(), snippetLength(), queryBuilder());
+ }
}
From d68c2e873c12d569bd6e08508bd139373ac42401 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 4 Aug 2025 10:54:09 -0400
Subject: [PATCH 06/44] Add comments from session with Carlos
---
.../fetch/subphase/highlight/HighlightPhase.java | 1 +
.../xpack/esql/capabilities/RewriteableAware.java | 4 ++++
.../function/fulltext/QueryBuilderResolver.java | 1 +
.../function/scalar/string/ExtractSnippets.java | 13 ++++++++++++-
4 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
index cf9e8fbf7ded0..bd0bddea0261d 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
@@ -66,6 +66,7 @@ public void process(HitContext hitContext) throws IOException {
Map> contextBuilders = fieldContext.builders;
for (String field : contextBuilders.keySet()) {
FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext);
+ // TODO create this in ES|QL when processing matches
Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType);
HighlightField highlightField = highlighter.highlight(fieldContext);
if (highlightField != null) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index 4c4a34b60e46e..4cb1610e5945f 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -9,6 +9,10 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
+import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
+import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
+import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery;
public interface RewriteableAware {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index 9267d039aaf91..dd47ceb96d8d6 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -77,6 +77,7 @@ private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable exceptionHolder = new Holder<>();
Holder updated = new Holder<>(false);
+ // TODO this needs to work with any rewriteable aware not just full text function
LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> {
QueryBuilder builder = f.queryBuilder(), initial = builder;
builder = builder == null
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index a163417eebd4d..58ea52ec4ab0d 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -18,6 +18,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -28,7 +29,10 @@
import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
+import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
+import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
+import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery;
import java.io.IOException;
import java.util.List;
@@ -44,7 +48,7 @@
/**
* Extract snippets function, that extracts the most relevant snippets from a given input string
*/
-// TODO: Does this also need to implement TranslationAware?
+// TODO: This also needs to implement TranslationAware?
public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
@@ -188,8 +192,10 @@ protected NodeInfo extends Expression> info() {
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
List shardContexts = toEvaluator.shardContexts();
LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
+
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
+ // TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders
shardContext.addHighlightQuery(
field.sourceText(),
str.sourceText(),
@@ -213,6 +219,11 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder);
}
+ @Override
+ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
+ return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler);
+ }
+
Expression field() {
return field;
}
From 05711001ad67c3ac0ee5349034467b0b7ee4ce15 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 4 Aug 2025 15:01:31 -0400
Subject: [PATCH 07/44] Make translation aware and get further down the rewrite
cycle (still doesn't completely work yet)
---
.../HighlighterExpressionEvaluator.java | 2 -
.../fulltext/QueryBuilderResolver.java | 40 +++++++++++--------
.../scalar/string/ExtractSnippets.java | 21 ++++++----
3 files changed, 37 insertions(+), 26 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index aeaff93ac0a53..40e5bc9cf6364 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -63,8 +63,6 @@ public Block eval(Page page) {
public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
- // TODO: Is it possible to add the highlight query here, rather than in ExtractSnippets? Would require ShardConfig having access
- // to context
return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs);
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index dd47ceb96d8d6..bfdf14fd3d7ff 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -13,7 +13,9 @@
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.xpack.esql.core.util.Holder;
-import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets;
+import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
+import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -38,8 +40,11 @@ private QueryBuilderResolver() {}
public static void resolveQueryBuilders(LogicalPlan plan, TransportActionServices services, ActionListener listener) {
var hasRewriteableAwareFunctions = plan.anyMatch(p -> {
Holder hasRewriteable = new Holder<>(false);
- p.forEachExpression(FullTextFunction.class, unused -> hasRewriteable.set(true));
- p.forEachExpression(ExtractSnippets.class, unused -> hasRewriteable.set(true));
+ p.forEachExpression(expr -> {
+ if (expr instanceof RewriteableAware) {
+ hasRewriteable.set(true);
+ }
+ });
return hasRewriteable.get();
});
if (hasRewriteableAwareFunctions) {
@@ -77,20 +82,23 @@ private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable exceptionHolder = new Holder<>();
Holder updated = new Holder<>(false);
- // TODO this needs to work with any rewriteable aware not just full text function
- LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> {
- QueryBuilder builder = f.queryBuilder(), initial = builder;
- builder = builder == null
- ? f.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
- : builder;
- try {
- builder = builder.rewrite(ctx);
- } catch (IOException e) {
- exceptionHolder.setIfAbsent(e);
+ LogicalPlan newPlan = plan.transformExpressionsDown(Expression.class, expr -> {
+ Expression finalExpression = expr;
+ if (expr instanceof RewriteableAware rewriteableAware && expr instanceof TranslationAware translationAware) {
+ QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder;
+ builder = builder == null
+ ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
+ : builder;
+ try {
+ builder = builder.rewrite(ctx);
+ } catch (IOException e) {
+ exceptionHolder.setIfAbsent(e);
+ }
+ var rewritten = builder != initial;
+ updated.set(updated.get() || rewritten);
+ finalExpression = rewritten ? rewriteableAware.replaceQueryBuilder(builder) : finalExpression;
}
- var rewritten = builder != initial;
- updated.set(updated.get() || rewritten);
- return rewritten ? f.replaceQueryBuilder(builder) : f;
+ return finalExpression;
});
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 58ea52ec4ab0d..386be5e99d37c 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -7,16 +7,17 @@
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
-import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator;
import org.elasticsearch.compute.lucene.LuceneQueryEvaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
+import org.elasticsearch.index.query.InterceptedQueryBuilderWrapper;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
+import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -26,7 +27,6 @@
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
import org.elasticsearch.xpack.esql.expression.function.Param;
-import org.elasticsearch.xpack.esql.expression.function.fulltext.Match;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -48,8 +48,7 @@
/**
* Extract snippets function, that extracts the most relevant snippets from a given input string
*/
-// TODO: This also needs to implement TranslationAware?
-public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware {
+public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"ExtractSnippets",
@@ -196,6 +195,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
// TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders
+
shardContext.addHighlightQuery(
field.sourceText(),
str.sourceText(),
@@ -206,7 +206,6 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher());
}
return new HighlighterExpressionEvaluator.Factory(shardConfigs);
-
}
@Override
@@ -219,9 +218,17 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder);
}
+ @Override
+ public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
+ return Translatable.YES;
+ }
+
@Override
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
- return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler);
+ if (queryBuilder != null) {
+ return new TranslationAwareExpressionQuery(source(), queryBuilder);
+ }
+ throw new IllegalStateException("Missing queryBuilder");
}
Expression field() {
@@ -242,8 +249,6 @@ Expression snippetLength() {
@Override
public boolean equals(Object o) {
- // Match does not serialize options, as they get included in the query builder. We need to override equals and hashcode to
- // ignore options when comparing two Match functions
if (o == null || getClass() != o.getClass()) return false;
ExtractSnippets extractSnippets = (ExtractSnippets) o;
return Objects.equals(field(), extractSnippets.field())
From 9fe765447a14ee794458c803bb2c97f1a6a77dc1 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 4 Aug 2025 16:15:24 -0400
Subject: [PATCH 08/44] Move building highlight query to extract snippets
---
.../scalar/string/ExtractSnippets.java | 52 ++++++++++++----
.../planner/EsPhysicalOperationProviders.java | 60 ++++++-------------
2 files changed, 60 insertions(+), 52 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 386be5e99d37c..5426d364044fd 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -13,9 +13,12 @@
import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator;
import org.elasticsearch.compute.lucene.LuceneQueryEvaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
-import org.elasticsearch.index.query.InterceptedQueryBuilderWrapper;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
+import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -194,15 +197,44 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
- // TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders
-
- shardContext.addHighlightQuery(
- field.sourceText(),
- str.sourceText(),
- Integer.parseInt(numSnippets.sourceText()),
- Integer.parseInt(snippetLength.sourceText()),
- queryBuilder
- );
+ SearchExecutionContext searchExecutionContext = shardContext.searchExecutionContext();
+ SearchContext searchContext = shardContext.searchContext();
+ if (searchContext == null) {
+ throw new IllegalStateException("Missing search context, cannot extract snippets");
+ }
+
+ try {
+ // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
+ HighlightBuilder highlightBuilder = new HighlightBuilder();
+ if (queryBuilder != null) {
+ highlightBuilder.highlightQuery(queryBuilder);
+ }
+ // Stripping pre/post tags as they're not useful for snippet creation
+ highlightBuilder.field(field.sourceText()).preTags("").postTags("");
+ // Return highest scoring fragments
+ highlightBuilder.order(HighlightBuilder.Order.SCORE);
+ highlightBuilder.numOfFragments(Integer.parseInt(numSnippets.sourceText()));
+ highlightBuilder.fragmentSize(Integer.parseInt(snippetLength.sourceText()));
+ highlightBuilder.noMatchSize(Integer.parseInt(snippetLength.sourceText()));
+
+ SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext);
+ searchContext.highlight(highlightContext);
+
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Failed to create highlight context for field ["
+ + field.sourceText()
+ + "], str ["
+ + str.sourceText()
+ + "], numSnippets: ["
+ + Integer.parseInt(numSnippets.sourceText())
+ + "], snippetLength: ["
+ + Integer.parseInt(snippetLength.sourceText())
+ + "]",
+ e
+ );
+ }
+
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher());
}
return new HighlighterExpressionEvaluator.Factory(shardConfigs);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
index e37670f6bd3fc..9e7549a183a3a 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
@@ -52,9 +52,8 @@
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.search.fetch.StoredFieldsSpec;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
-import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.AliasFilter;
+import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.search.sort.SortBuilder;
@@ -104,6 +103,10 @@ protected void closeInternal() {
}
};
+ public abstract SearchExecutionContext searchExecutionContext();
+
+ public abstract SearchContext searchContext();
+
@Override
public void incRef() {
refCounted.incRef();
@@ -138,8 +141,6 @@ public boolean hasReferences() {
* need one in ten documents.
*/
public abstract double storedFieldsSequentialProportion();
-
- public abstract void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder);
}
private final List shardContexts;
@@ -391,6 +392,19 @@ public DefaultShardContext(int index, Releasable releasable, SearchExecutionCont
this.shardIdentifier = this.ctx.getFullyQualifiedIndex().getName() + ":" + this.ctx.getShardId();
}
+ @Override
+ public SearchExecutionContext searchExecutionContext() {
+ return ctx;
+ }
+
+ @Override
+ public SearchContext searchContext() {
+ if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) {
+ return searchContext;
+ }
+ return null;
+ }
+
@Override
public int index() {
return index;
@@ -502,44 +516,6 @@ public double storedFieldsSequentialProportion() {
return EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.get(ctx.getIndexSettings().getSettings());
}
- @Override
- public void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder) {
- try {
- // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
- HighlightBuilder highlightBuilder = new HighlightBuilder();
- if (queryBuilder != null) {
- highlightBuilder.highlightQuery(queryBuilder);
- }
- // Stripping pre/post tags as they're not useful for snippet creation
- highlightBuilder.field(field).preTags("").postTags("");
- // Return highest scoring fragments
- highlightBuilder.order(HighlightBuilder.Order.SCORE);
- highlightBuilder.numOfFragments(numSnippets);
- highlightBuilder.fragmentSize(snippetLength);
- highlightBuilder.noMatchSize(snippetLength);
-
- SearchHighlightContext highlightContext = highlightBuilder.build(ctx);
-
- // Update the active SearchContext with the highlight context
- if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) {
- searchContext.highlight(highlightContext);
- }
- } catch (IOException e) {
- throw new RuntimeException(
- "Failed to create highlight context for field ["
- + field
- + "], str ["
- + str
- + "], numSnippets: ["
- + numSnippets
- + "], snippetLength: ["
- + snippetLength
- + "]",
- e
- );
- }
- }
-
@Override
public void close() {
releasable.close();
From 8adea5654ae9238707e45eaeac9bbdf67e46a95d Mon Sep 17 00:00:00 2001
From: cdelgado
Date: Mon, 4 Aug 2025 18:05:00 +0200
Subject: [PATCH 09/44] Cherry-pick: Initial incomplete work for creating the
Highlighter in the expression evaluator
---
.../highlight/SearchHighlightContext.java | 10 +-
.../HighlighterExpressionEvaluator.java | 95 +++++++++++++++++--
.../compute/lucene/LuceneQueryEvaluator.java | 30 ++++--
.../LuceneQueryExpressionEvaluator.java | 5 +-
.../lucene/LuceneQueryScoreEvaluator.java | 4 +-
.../LuceneQueryExpressionEvaluatorTests.java | 2 +-
.../LuceneQueryScoreEvaluatorTests.java | 2 +-
7 files changed, 123 insertions(+), 25 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
index 631a75a355abf..c04b52d747132 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
@@ -40,7 +40,7 @@ public static class Field {
private final String field;
private final FieldOptions fieldOptions;
- Field(String field, FieldOptions fieldOptions) {
+ public Field(String field, FieldOptions fieldOptions) {
assert field != null;
assert fieldOptions != null;
this.field = field;
@@ -185,16 +185,16 @@ public Map options() {
return options;
}
- static class Builder {
+ public static class Builder {
private final FieldOptions fieldOptions = new FieldOptions();
- Builder fragmentCharSize(int fragmentCharSize) {
+ public Builder fragmentCharSize(int fragmentCharSize) {
fieldOptions.fragmentCharSize = fragmentCharSize;
return this;
}
- Builder numberOfFragments(int numberOfFragments) {
+ public Builder numberOfFragments(int numberOfFragments) {
fieldOptions.numberOfFragments = numberOfFragments;
return this;
}
@@ -294,7 +294,7 @@ Builder options(Map options) {
return this;
}
- FieldOptions build() {
+ public FieldOptions build() {
return fieldOptions;
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 40e5bc9cf6364..d7e406b7ab75a 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -7,6 +7,8 @@
package org.elasticsearch.compute.lucene;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.util.BytesRef;
@@ -17,15 +19,46 @@
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
+import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.fetch.FetchContext;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.Source;
import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.function.Supplier;
public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
implements
EvalOperator.ExpressionEvaluator {
- HighlighterExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shardConfigs) {
+ private final String fieldName;
+ private final SearchContext searchContext;
+ private final SourceLoader sourceLoader;
+
+ HighlighterExpressionEvaluator(
+ BlockFactory blockFactory,
+ ShardConfig[] shardConfigs,
+ String fieldName,
+ SearchContext searchContext,
+ SourceLoader sourceLoader
+ ) {
super(blockFactory, shardConfigs);
+ this.fieldName = fieldName;
+ this.searchContext = searchContext;
+ this.sourceLoader = sourceLoader;
}
@Override
@@ -44,15 +77,62 @@ protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory,
}
@Override
- protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException {
- // TODO: add snippets here
- builder.appendBytesRef(new BytesRef("I am a snippet")); // Placeholder for actual highlighted text
+ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
+
+ // I was trying to find the way to build the highligher from the context, but probably we should just build the
+ // CustomUnifiedHighligher directly so we don't need specific fetch phase classes for this
+ SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
+ optionsBuilder.numberOfFragments(10);
+ optionsBuilder.fragmentCharSize(100);
+ SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
+ FetchContext fetchContext = new FetchContext(searchContext, sourceLoader);
+ MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
+ SearchHit searchHit = new SearchHit(docId);
+ Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
+
+
+ FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(
+ searchHit,
+ leafReaderContext,
+ docId,
+ Map.of(),
+ source,
+ null
+ );
+ FieldHighlightContext highlightContext = new FieldHighlightContext(
+ fieldName,
+ field,
+ fieldType,
+ fetchContext,
+ hitContext,
+ query,
+ Map.of()
+ );
+ Highlighter highlighter = new DefaultHighlighter();
+ HighlightField highlight = highlighter.highlight(highlightContext);
+
+ // Iterate over fragments etc
+ builder.appendBytesRef(new BytesRef(highlight.fragments()[0].bytes().bytes()));
+ }
+
+ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
+ return () -> {
+ StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet());
+ try {
+ LeafStoredFieldLoader leafRootLoader = rootLoader.getLoader(ctx, null);
+ leafRootLoader.advanceTo(doc);
+ return Source.fromBytes(leafRootLoader.source());
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ };
}
@Override
protected void appendNoMatch(BytesRefVector.Builder builder) {
- // NOTE: Carlos originally suggested that we add null here, but that doesn't work - errors on missing key
- builder.appendBytesRef(new BytesRef());
+
+
}
@Override
@@ -63,7 +143,8 @@ public Block eval(Page page) {
public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
- return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs);
+ // We need to get field name, search context, and source loader. We should be able to remove the source loader by getting the field value
+ return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs, fieldName, searchContext, context.sourceLoader());
}
}
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
index d91df60621fce..e6eae5109f264 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
@@ -17,7 +17,6 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
-import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.DocBlock;
@@ -49,7 +48,7 @@ public abstract class LuceneQueryEvaluator implements
public record ShardConfig(Query query, IndexSearcher searcher) {}
private final BlockFactory blockFactory;
- private final ShardConfig[] shards;
+ protected final ShardConfig[] shards;
private final List perShardState;
@@ -266,8 +265,11 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
min,
max,
scoreBuilder,
+ ctx,
LuceneQueryEvaluator.this::appendNoMatch,
- LuceneQueryEvaluator.this::appendMatch
+ (builder, scorer1, docId, ctc, query) ->
+ LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query),
+ weight.getQuery()
)
) {
bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1);
@@ -308,12 +310,12 @@ private void initScorer(int minDocId) throws IOException {
private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
if (scorer.iterator().docID() == doc) {
- appendMatch(builder, scorer);
+ appendMatch(builder, scorer, doc, ctx, weight.getQuery());
} else if (scorer.iterator().docID() > doc) {
appendNoMatch(builder);
} else {
if (scorer.iterator().advance(doc) == doc) {
- appendMatch(builder, scorer);
+ appendMatch(builder, scorer, doc, ctx, weight.getQuery());
} else {
appendNoMatch(builder);
}
@@ -321,6 +323,11 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
}
}
+ @FunctionalInterface
+ public interface MatchAppender {
+ void accept(T t, U u, int docId, LeafReaderContext leafReaderContext, Query query) throws E;
+ }
+
/**
* Collects matching information for dense range of doc ids. This assumes that
* doc ids are sent to {@link LeafCollector#collect(int)} in ascending order
@@ -329,8 +336,10 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
static class DenseCollector implements LeafCollector, Releasable {
private final U scoreBuilder;
private final int max;
+ private final LeafReaderContext leafReaderContext;
private final Consumer appendNoMatch;
- private final CheckedBiConsumer appendMatch;
+ private final MatchAppender appendMatch;
+ private final Query query;
private Scorable scorer;
int next;
@@ -339,14 +348,17 @@ static class DenseCollector implements LeafCollector,
int min,
int max,
U scoreBuilder,
+ LeafReaderContext leafReaderContext,
Consumer appendNoMatch,
- CheckedBiConsumer appendMatch
+ MatchAppender appendMatch, Query query
) {
this.scoreBuilder = scoreBuilder;
this.max = max;
next = min;
+ this.leafReaderContext = leafReaderContext;
this.appendNoMatch = appendNoMatch;
this.appendMatch = appendMatch;
+ this.query = query;
}
@Override
@@ -359,7 +371,7 @@ public void collect(int doc) throws IOException {
while (next++ < doc) {
appendNoMatch.accept(scoreBuilder);
}
- appendMatch.accept(scoreBuilder, scorer);
+ appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query);
}
public Vector build() {
@@ -397,7 +409,7 @@ public void close() {
/**
* Appends a matching result to a builder created by @link createVectorBuilder}
*/
- protected abstract void appendMatch(T builder, Scorable scorer) throws IOException;
+ protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException;
/**
* Appends a non matching result to a builder created by @link createVectorBuilder}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
index 73eae67819907..9c65d89782b93 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
@@ -7,6 +7,7 @@
package org.elasticsearch.compute.lucene;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
@@ -17,6 +18,7 @@
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.index.mapper.IdLoader;
import java.io.IOException;
@@ -60,7 +62,8 @@ protected void appendNoMatch(BooleanVector.Builder builder) {
}
@Override
- protected void appendMatch(BooleanVector.Builder builder, Scorable scorer) throws IOException {
+ protected void appendMatch(BooleanVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
builder.appendBoolean(true);
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
index cc7b9d9878fa1..1b422b4443e5d 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
@@ -7,6 +7,7 @@
package org.elasticsearch.compute.lucene;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
@@ -61,7 +62,8 @@ protected void appendNoMatch(DoubleVector.Builder builder) {
}
@Override
- protected void appendMatch(DoubleVector.Builder builder, Scorable scorer) throws IOException {
+ protected void appendMatch(DoubleVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
builder.appendDouble(scorer.score());
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
index ba9e62abb8b35..9a0016f60ba11 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
@@ -28,7 +28,7 @@ protected DenseCollector createDenseCollector(int min, in
max,
blockFactory().newBooleanVectorFixedBuilder(max - min + 1),
b -> b.appendBoolean(false),
- (b, s) -> b.appendBoolean(true)
+ (b, s, d) -> b.appendBoolean(true)
);
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
index 53fa3f775386c..c1797f2e22724 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
@@ -32,7 +32,7 @@ protected LuceneQueryEvaluator.DenseCollector createDenseC
max,
blockFactory().newDoubleVectorFixedBuilder(max - min + 1),
b -> b.appendDouble(NO_MATCH_SCORE),
- (b, s) -> b.appendDouble(s.score())
+ (b, s, d) -> b.appendDouble(s.score())
);
}
From 6be55b470513662bad1b7314dd1e8611b943a95f Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Wed, 6 Aug 2025 11:17:06 -0400
Subject: [PATCH 10/44] Hack in highlighter so it actually produces a response
---
.../highlight/DefaultHighlighter.java | 10 ++-
.../highlight/SearchHighlightContext.java | 8 +--
.../compute/src/main/java/module-info.java | 1 +
.../HighlighterExpressionEvaluator.java | 69 ++++++++++++-------
.../scalar/string/ExtractSnippets.java | 7 +-
5 files changed, 61 insertions(+), 34 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index 3efbcd15140e5..927ad0f5ad434 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -60,9 +60,13 @@ public boolean canHighlight(MappedFieldType fieldType) {
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
@SuppressWarnings("unchecked")
- Map cache = (Map) fieldContext.cache.computeIfAbsent(
+ // Map cache = (Map) fieldContext.cache.computeIfAbsent(
+ // UnifiedHighlighter.class.getName(),
+ // k -> new HashMap<>()
+ // );
+ Map cache = (Map) fieldContext.cache.getOrDefault(
UnifiedHighlighter.class.getName(),
- k -> new HashMap<>()
+ new HashMap<>()
);
if (cache.containsKey(fieldContext.fieldName) == false) {
cache.put(fieldContext.fieldName, buildHighlighter(fieldContext));
@@ -114,7 +118,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings();
- Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html")
+ Encoder encoder = "html".equals(fieldContext.field.fieldOptions().encoder())
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
index c04b52d747132..f06c667a073ba 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
@@ -209,17 +209,17 @@ Builder encoder(String encoder) {
return this;
}
- Builder preTags(String[] preTags) {
+ public Builder preTags(String[] preTags) {
fieldOptions.preTags = preTags;
return this;
}
- Builder postTags(String[] postTags) {
+ public Builder postTags(String[] postTags) {
fieldOptions.postTags = postTags;
return this;
}
- Builder scoreOrdered(boolean scoreOrdered) {
+ public Builder scoreOrdered(boolean scoreOrdered) {
fieldOptions.scoreOrdered = scoreOrdered;
return this;
}
@@ -229,7 +229,7 @@ Builder highlightFilter(boolean highlightFilter) {
return this;
}
- Builder requireFieldMatch(boolean requireFieldMatch) {
+ public Builder requireFieldMatch(boolean requireFieldMatch) {
fieldOptions.requireFieldMatch = requireFieldMatch;
return this;
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java
index f21ed72d7eb21..5504e48d74636 100644
--- a/x-pack/plugin/esql/compute/src/main/java/module-info.java
+++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java
@@ -21,6 +21,7 @@
requires org.elasticsearch.geo;
requires org.elasticsearch.xcore;
requires hppc;
+ requires org.apache.lucene.highlighter;
exports org.elasticsearch.compute;
exports org.elasticsearch.compute.aggregation;
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index d7e406b7ab75a..a15e91709fe4b 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -28,15 +28,18 @@
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.xcontent.Text;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
+import java.util.HashMap;
import java.util.Map;
import java.util.function.Supplier;
@@ -45,20 +48,23 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator()
);
Highlighter highlighter = new DefaultHighlighter();
HighlightField highlight = highlighter.highlight(highlightContext);
- // Iterate over fragments etc
- builder.appendBytesRef(new BytesRef(highlight.fragments()[0].bytes().bytes()));
+ // TODO: Even when I have 2 fragments coming back, it's only ever returning the first bytes ref vector. Is this the appropriate data
+ // structure?
+ for (Text highlightText : highlight.fragments()) {
+ builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes()));
+ }
}
private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
@@ -131,8 +137,7 @@ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, in
@Override
protected void appendNoMatch(BytesRefVector.Builder builder) {
-
-
+ // builder.appendBytesRef(new BytesRef());
}
@Override
@@ -140,11 +145,23 @@ public Block eval(Page page) {
return executeQuery(page);
}
- public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory {
+ public record Factory(
+ ShardConfig[] shardConfigs,
+ String fieldName,
+ Integer numFragments,
+ Integer fragmentSize,
+ SearchContext searchContext
+ ) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
- // We need to get field name, search context, and source loader. We should be able to remove the source loader by getting the field value
- return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs, fieldName, searchContext, context.sourceLoader());
+ return new HighlighterExpressionEvaluator(
+ context.blockFactory(),
+ shardConfigs,
+ fieldName,
+ numFragments,
+ fragmentSize,
+ searchContext
+ );
}
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 5426d364044fd..6a2018c61d335 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -237,7 +237,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher());
}
- return new HighlighterExpressionEvaluator.Factory(shardConfigs);
+ // Get field name and search context from the first shard context
+ String fieldNameStr = field.sourceText();
+ int numFragments = numSnippets == null ? DEFAULT_NUM_SNIPPETS : Integer.parseInt(numSnippets.sourceText());
+ int fragmentSize = snippetLength == null ? DEFAULT_SNIPPET_LENGTH : Integer.parseInt(snippetLength.sourceText());
+ SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext();
+ return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numFragments, fragmentSize, firstSearchContext);
}
@Override
From 60e3ce6cb3da4c5f2f75389430b804f4a5200659 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Thu, 7 Aug 2025 20:23:50 +0000
Subject: [PATCH 11/44] [CI] Auto commit changes from spotless
---
.../compute/lucene/LuceneQueryEvaluator.java | 9 +++++----
.../compute/lucene/LuceneQueryExpressionEvaluator.java | 1 -
.../xpack/esql/plugin/ExtractSnippetsIT.java | 2 --
.../xpack/esql/capabilities/RewriteableAware.java | 4 ----
.../function/fulltext/QueryBuilderResolver.java | 2 +-
5 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
index e6eae5109f264..13eb580476cd9 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
@@ -267,8 +267,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
scoreBuilder,
ctx,
LuceneQueryEvaluator.this::appendNoMatch,
- (builder, scorer1, docId, ctc, query) ->
- LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query),
+ (builder, scorer1, docId, ctc, query) -> LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query),
weight.getQuery()
)
) {
@@ -350,7 +349,8 @@ static class DenseCollector implements LeafCollector,
U scoreBuilder,
LeafReaderContext leafReaderContext,
Consumer appendNoMatch,
- MatchAppender appendMatch, Query query
+ MatchAppender appendMatch,
+ Query query
) {
this.scoreBuilder = scoreBuilder;
this.max = max;
@@ -409,7 +409,8 @@ public void close() {
/**
* Appends a matching result to a builder created by @link createVectorBuilder}
*/
- protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException;
+ protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException;
/**
* Appends a non matching result to a builder created by @link createVectorBuilder}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
index 9c65d89782b93..e81cbe3183a9b 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
@@ -18,7 +18,6 @@
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
-import org.elasticsearch.index.mapper.IdLoader;
import java.io.IOException;
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
index 4b5affa2f6fc3..aea176b48e000 100644
--- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
+++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
@@ -41,8 +41,6 @@ public void testExtractSnippets() {
}
}
-
-
static void createAndPopulateIndex(Consumer ensureYellow) {
var indexName = "test";
var client = client().admin().indices();
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index 4cb1610e5945f..4c4a34b60e46e 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -9,10 +9,6 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
-import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
-import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
-import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery;
public interface RewriteableAware {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index bfdf14fd3d7ff..159d179e9fd7b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -12,10 +12,10 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
-import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
From b6fb4f3383fc32f69c5d7ab34907b990de40373e Mon Sep 17 00:00:00 2001
From: cdelgado
Date: Mon, 11 Aug 2025 12:20:39 +0200
Subject: [PATCH 12/44] Change LuceneQueryEvaluator to use Blocks instead of
Vectors to make it multivalue aware
---
.../HighlighterExpressionEvaluator.java | 28 ++++++++------
.../compute/lucene/LuceneQueryEvaluator.java | 38 +++++++++----------
.../LuceneQueryExpressionEvaluator.java | 16 ++++----
.../lucene/LuceneQueryScoreEvaluator.java | 15 ++++----
.../lucene/LuceneQueryEvaluatorTests.java | 9 ++---
.../LuceneQueryExpressionEvaluatorTests.java | 17 +++++----
.../LuceneQueryScoreEvaluatorTests.java | 16 ++++----
7 files changed, 73 insertions(+), 66 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index a15e91709fe4b..e7112249d89b6 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -14,9 +14,8 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
-import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
@@ -43,7 +42,7 @@
import java.util.Map;
import java.util.function.Supplier;
-public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
+public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
implements
EvalOperator.ExpressionEvaluator {
@@ -73,17 +72,17 @@ protected ScoreMode scoreMode() {
}
@Override
- protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
- return blockFactory.newConstantBytesRefVector(new BytesRef(), size);
+ protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
+ return blockFactory.newConstantNullBlock(size);
}
@Override
- protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
- return blockFactory.newBytesRefVectorBuilder(size * numFragments);
+ protected BytesRefBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+ return blockFactory.newBytesRefBlockBuilder(size * numFragments);
}
@Override
- protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
throws IOException {
// TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes?
@@ -115,11 +114,16 @@ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int
Highlighter highlighter = new DefaultHighlighter();
HighlightField highlight = highlighter.highlight(highlightContext);
- // TODO: Even when I have 2 fragments coming back, it's only ever returning the first bytes ref vector. Is this the appropriate data
- // structure?
+ boolean multivalued = highlight.fragments().length > 1;
+ if (multivalued) {
+ builder.beginPositionEntry();
+ }
for (Text highlightText : highlight.fragments()) {
builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes()));
}
+ if (multivalued) {
+ builder.endPositionEntry();
+ }
}
private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
@@ -136,8 +140,8 @@ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, in
}
@Override
- protected void appendNoMatch(BytesRefVector.Builder builder) {
- // builder.appendBytesRef(new BytesRef());
+ protected void appendNoMatch(BytesRefBlock.Builder builder) {
+ builder.appendNull();
}
@Override
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
index 13eb580476cd9..ad05d27b8f42d 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
@@ -23,7 +23,6 @@
import org.elasticsearch.compute.data.DocVector;
import org.elasticsearch.compute.data.IntVector;
import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.Releasables;
@@ -43,7 +42,7 @@
* It's much faster to push queries to the {@link LuceneSourceOperator} or the like, but sometimes this isn't possible. So
* this class is here to save the day.
*/
-public abstract class LuceneQueryEvaluator implements Releasable {
+public abstract class LuceneQueryEvaluator implements Releasable {
public record ShardConfig(Query query, IndexSearcher searcher) {}
@@ -66,9 +65,9 @@ public Block executeQuery(Page page) {
DocVector docs = (DocVector) block.asVector();
try {
if (docs.singleSegmentNonDecreasing()) {
- return evalSingleSegmentNonDecreasing(docs).asBlock();
+ return evalSingleSegmentNonDecreasing(docs);
} else {
- return evalSlow(docs).asBlock();
+ return evalSlow(docs);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
@@ -105,15 +104,15 @@ public Block executeQuery(Page page) {
* common.
*
*/
- private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
+ private Block evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
ShardState shardState = shardState(docs.shards().getInt(0));
SegmentState segmentState = shardState.segmentState(docs.segments().getInt(0));
int min = docs.docs().getInt(0);
int max = docs.docs().getInt(docs.getPositionCount() - 1);
int length = max - min + 1;
- try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
+ try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
if (length == docs.getPositionCount() && length > 1) {
- return segmentState.scoreDense(scoreBuilder, min, max);
+ return segmentState.scoreDense(scoreBuilder, min, max, docs.getPositionCount());
}
return segmentState.scoreSparse(scoreBuilder, docs.docs());
}
@@ -133,13 +132,13 @@ private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException
* the order that the {@link DocVector} came in.
*
*/
- private Vector evalSlow(DocVector docs) throws IOException {
+ private Block evalSlow(DocVector docs) throws IOException {
int[] map = docs.shardSegmentDocMapForwards();
// Clear any state flags from the previous run
int prevShard = -1;
int prevSegment = -1;
SegmentState segmentState = null;
- try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
+ try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
for (int i = 0; i < docs.getPositionCount(); i++) {
int shard = docs.shards().getInt(docs.shards().getInt(map[i]));
int segment = docs.segments().getInt(map[i]);
@@ -155,7 +154,7 @@ private Vector evalSlow(DocVector docs) throws IOException {
segmentState.scoreSingleDocWithScorer(scoreBuilder, docs.docs().getInt(map[i]));
}
}
- try (Vector outOfOrder = scoreBuilder.build()) {
+ try (Block outOfOrder = scoreBuilder.build()) {
return outOfOrder.filter(docs.shardSegmentDocMapBackwards());
}
}
@@ -246,9 +245,9 @@ private SegmentState(Weight weight, LeafReaderContext ctx) {
* Score a range using the {@link BulkScorer}. This should be faster
* than using {@link #scoreSparse} for dense doc ids.
*/
- Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
+ Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOException {
if (noMatch) {
- return createNoMatchVector(blockFactory, max - min + 1);
+ return createNoMatchBlock(blockFactory, max - min + 1);
}
if (bulkScorer == null || // The bulkScorer wasn't initialized
Thread.currentThread() != bulkScorerThread // The bulkScorer was initialized on a different thread
@@ -257,7 +256,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
bulkScorer = weight.bulkScorer(ctx);
if (bulkScorer == null) {
noMatch = true;
- return createNoMatchVector(blockFactory, max - min + 1);
+ return createNoMatchBlock(blockFactory, positionCount);
}
}
try (
@@ -272,6 +271,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
)
) {
bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1);
+ collector.finish();
return collector.build();
}
}
@@ -280,10 +280,10 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
* Score a vector of doc ids using {@link Scorer}. If you have a dense range of
* doc ids it'd be faster to use {@link #scoreDense}.
*/
- Vector scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
+ Block scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
initScorer(docs.getInt(0));
if (noMatch) {
- return createNoMatchVector(blockFactory, docs.getPositionCount());
+ return createNoMatchBlock(blockFactory, docs.getPositionCount());
}
for (int i = 0; i < docs.getPositionCount(); i++) {
scoreSingleDocWithScorer(scoreBuilder, docs.getInt(i));
@@ -332,7 +332,7 @@ public interface MatchAppender {
* doc ids are sent to {@link LeafCollector#collect(int)} in ascending order
* which isn't documented, but @jpountz swears is true.
*/
- static class DenseCollector implements LeafCollector, Releasable {
+ static class DenseCollector implements LeafCollector, Releasable {
private final U scoreBuilder;
private final int max;
private final LeafReaderContext leafReaderContext;
@@ -374,7 +374,7 @@ public void collect(int doc) throws IOException {
appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query);
}
- public Vector build() {
+ public Block build() {
return scoreBuilder.build();
}
@@ -399,12 +399,12 @@ public void close() {
/**
* Creates a vector where all positions correspond to elements that don't match the query
*/
- protected abstract Vector createNoMatchVector(BlockFactory blockFactory, int size);
+ protected abstract Block createNoMatchBlock(BlockFactory blockFactory, int size);
/**
* Creates the corresponding vector builder to store the results of evaluating the query
*/
- protected abstract T createVectorBuilder(BlockFactory blockFactory, int size);
+ protected abstract T createBlockBuilder(BlockFactory blockFactory, int size);
/**
* Appends a matching result to a builder created by @link createVectorBuilder}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
index e81cbe3183a9b..c08f9b8822925 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
@@ -13,9 +13,9 @@
import org.apache.lucene.search.ScoreMode;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BooleanBlock;
import org.elasticsearch.compute.data.BooleanVector;
import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
@@ -27,7 +27,7 @@
* a {@link BooleanVector}.
* @see LuceneQueryScoreEvaluator
*/
-public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator
+public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator
implements
EvalOperator.ExpressionEvaluator {
@@ -46,22 +46,22 @@ protected ScoreMode scoreMode() {
}
@Override
- protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
- return blockFactory.newConstantBooleanVector(false, size);
+ protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
+ return blockFactory.newConstantBooleanBlockWith(false, size);
}
@Override
- protected BooleanVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
- return blockFactory.newBooleanVectorFixedBuilder(size);
+ protected BooleanBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+ return blockFactory.newBooleanBlockBuilder(size);
}
@Override
- protected void appendNoMatch(BooleanVector.Builder builder) {
+ protected void appendNoMatch(BooleanBlock.Builder builder) {
builder.appendBoolean(false);
}
@Override
- protected void appendMatch(BooleanVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
throws IOException {
builder.appendBoolean(true);
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
index 1b422b4443e5d..88b5721a6fdf9 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
@@ -15,7 +15,6 @@
import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.DoubleVector;
import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.ScoreOperator;
@@ -28,7 +27,7 @@
* Elements that don't match will have a score of {@link #NO_MATCH_SCORE}.
* @see LuceneQueryScoreEvaluator
*/
-public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator implements ScoreOperator.ExpressionScorer {
+public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator implements ScoreOperator.ExpressionScorer {
public static final double NO_MATCH_SCORE = 0.0;
@@ -47,22 +46,22 @@ protected ScoreMode scoreMode() {
}
@Override
- protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
- return blockFactory.newConstantDoubleVector(NO_MATCH_SCORE, size);
+ protected DoubleBlock createNoMatchBlock(BlockFactory blockFactory, int size) {
+ return blockFactory.newConstantDoubleBlockWith(NO_MATCH_SCORE, size);
}
@Override
- protected DoubleVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
- return blockFactory.newDoubleVectorFixedBuilder(size);
+ protected DoubleBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+ return blockFactory.newDoubleBlockBuilder(size);
}
@Override
- protected void appendNoMatch(DoubleVector.Builder builder) {
+ protected void appendNoMatch(DoubleBlock.Builder builder) {
builder.appendDouble(NO_MATCH_SCORE);
}
@Override
- protected void appendMatch(DoubleVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
throws IOException {
builder.appendDouble(scorer.score());
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java
index 2ef64623daa74..cc7d25342b6ee 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java
@@ -25,14 +25,13 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.compute.OperatorTests;
+import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
-import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.DocBlock;
import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.ElementType;
import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
import org.elasticsearch.compute.operator.Driver;
import org.elasticsearch.compute.operator.DriverContext;
@@ -59,7 +58,7 @@
/**
* Base class for testing Lucene query evaluators.
*/
-public abstract class LuceneQueryEvaluatorTests extends ComputeTestCase {
+public abstract class LuceneQueryEvaluatorTests extends ComputeTestCase {
private static final String FIELD = "g";
@@ -168,9 +167,9 @@ protected void assertTermsQuery(List results, Set matching, int ex
int matchCount = 0;
for (Page page : results) {
int initialBlockIndex = termsBlockIndex(page);
- BytesRefVector terms = page.getBlock(initialBlockIndex).asVector();
+ BytesRefBlock terms = page.getBlock(initialBlockIndex);
@SuppressWarnings("unchecked")
- T resultVector = (T) page.getBlock(resultsBlockIndex(page)).asVector();
+ T resultVector = (T) page.getBlock(resultsBlockIndex(page));
for (int i = 0; i < page.getPositionCount(); i++) {
BytesRef termAtPosition = terms.getBytesRef(i, new BytesRef());
boolean isMatch = matching.contains(termAtPosition.utf8ToString());
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
index 9a0016f60ba11..f6808962fcdf6 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
@@ -8,8 +8,9 @@
package org.elasticsearch.compute.lucene;
import org.apache.lucene.search.Scorable;
+import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
-import org.elasticsearch.compute.data.BooleanVector;
+import org.elasticsearch.compute.data.BooleanBlock;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.lucene.LuceneQueryEvaluator.DenseCollector;
import org.elasticsearch.compute.operator.EvalOperator;
@@ -17,18 +18,20 @@
import static org.hamcrest.Matchers.equalTo;
-public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests {
+public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests {
private final boolean useScoring = randomBoolean();
@Override
- protected DenseCollector createDenseCollector(int min, int max) {
+ protected DenseCollector createDenseCollector(int min, int max) {
return new LuceneQueryEvaluator.DenseCollector<>(
min,
max,
- blockFactory().newBooleanVectorFixedBuilder(max - min + 1),
+ blockFactory().newBooleanBlockBuilder(max - min + 1),
+ null,
b -> b.appendBoolean(false),
- (b, s, d) -> b.appendBoolean(true)
+ (b, s, d, lr, q) -> b.appendBoolean(true),
+ null
);
}
@@ -54,12 +57,12 @@ protected int resultsBlockIndex(Page page) {
}
@Override
- protected void assertCollectedResultMatch(BooleanVector resultVector, int position, boolean isMatch) {
+ protected void assertCollectedResultMatch(BooleanBlock resultVector, int position, boolean isMatch) {
assertThat(resultVector.getBoolean(position), equalTo(isMatch));
}
@Override
- protected void assertTermResultMatch(BooleanVector resultVector, int position, boolean isMatch) {
+ protected void assertTermResultMatch(BooleanBlock resultVector, int position, boolean isMatch) {
assertThat(resultVector.getBoolean(position), equalTo(isMatch));
}
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
index c1797f2e22724..af162db91978f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
@@ -9,7 +9,7 @@
import org.apache.lucene.search.Scorable;
import org.elasticsearch.compute.data.BlockFactory;
-import org.elasticsearch.compute.data.DoubleVector;
+import org.elasticsearch.compute.data.DoubleBlock;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.Operator;
import org.elasticsearch.compute.operator.ScoreOperator;
@@ -20,19 +20,21 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
-public class LuceneQueryScoreEvaluatorTests extends LuceneQueryEvaluatorTests {
+public class LuceneQueryScoreEvaluatorTests extends LuceneQueryEvaluatorTests {
private static final float TEST_SCORE = 1.5f;
private static final Double DEFAULT_SCORE = 1.0;
@Override
- protected LuceneQueryEvaluator.DenseCollector createDenseCollector(int min, int max) {
+ protected LuceneQueryEvaluator.DenseCollector createDenseCollector(int min, int max) {
return new LuceneQueryEvaluator.DenseCollector<>(
min,
max,
- blockFactory().newDoubleVectorFixedBuilder(max - min + 1),
+ blockFactory().newDoubleBlockBuilder(max - min + 1),
+ null,
b -> b.appendDouble(NO_MATCH_SCORE),
- (b, s, d) -> b.appendDouble(s.score())
+ (b, s, d, lr, q) -> b.appendDouble(s.score()),
+ null
);
}
@@ -63,7 +65,7 @@ protected int resultsBlockIndex(Page page) {
}
@Override
- protected void assertCollectedResultMatch(DoubleVector resultVector, int position, boolean isMatch) {
+ protected void assertCollectedResultMatch(DoubleBlock resultVector, int position, boolean isMatch) {
if (isMatch) {
assertThat(resultVector.getDouble(position), equalTo((double) TEST_SCORE));
} else {
@@ -73,7 +75,7 @@ protected void assertCollectedResultMatch(DoubleVector resultVector, int positio
}
@Override
- protected void assertTermResultMatch(DoubleVector resultVector, int position, boolean isMatch) {
+ protected void assertTermResultMatch(DoubleBlock resultVector, int position, boolean isMatch) {
if (isMatch) {
assertThat(resultVector.getDouble(position), greaterThan(DEFAULT_SCORE));
} else {
From f6a807986c080b7ed02efb9e713c9ee21f2a9846 Mon Sep 17 00:00:00 2001
From: cdelgado
Date: Mon, 11 Aug 2025 14:00:20 +0200
Subject: [PATCH 13/44] Add rewritability
---
.../esql/capabilities/RewriteableAware.java | 2 +-
.../scalar/string/ExtractSnippets.java | 28 +++++++++++++++----
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index 4c4a34b60e46e..d4142833298c7 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -10,7 +10,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-public interface RewriteableAware {
+public interface RewriteableAware extends TranslationAware{
QueryBuilder queryBuilder();
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 6a2018c61d335..8116514188a53 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -22,10 +22,13 @@
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.util.Check;
+import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
@@ -35,10 +38,12 @@
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
+import org.elasticsearch.xpack.esql.querydsl.query.MatchQuery;
import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery;
import java.io.IOException;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
@@ -47,11 +52,13 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
+import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.fieldAsFieldAttribute;
+import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.getNameFromFieldAttribute;
/**
* Extract snippets function, that extracts the most relevant snippets from a given input string
*/
-public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware {
+public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware, EvaluatorMapper {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"ExtractSnippets",
@@ -257,15 +264,24 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) {
@Override
public Translatable translatable(LucenePushdownPredicates pushdownPredicates) {
- return Translatable.YES;
+ // We don't want pushdown for this function, as it is not a filter query
+ return Translatable.NO;
}
@Override
public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
- if (queryBuilder != null) {
- return new TranslationAwareExpressionQuery(source(), queryBuilder);
- }
- throw new IllegalStateException("Missing queryBuilder");
+ return queryBuilder != null
+ ? new TranslationAwareExpressionQuery(source(), queryBuilder())
+ : translate(pushdownPredicates, handler);
+ }
+
+ private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
+ var fieldAttribute = fieldAsFieldAttribute(field());
+ Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument");
+ String fieldName = getNameFromFieldAttribute(fieldAttribute);
+ Object query = str().fold(FoldContext.small());
+ // Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided
+ return new MatchQuery(source(), fieldName, query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true));
}
Expression field() {
From 1ca0b583eb9bad45ff8ff4e370fd80b39a620fc8 Mon Sep 17 00:00:00 2001
From: cdelgado
Date: Mon, 11 Aug 2025 14:12:56 +0200
Subject: [PATCH 14/44] Solve params via fold
---
.../scalar/string/ExtractSnippets.java | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 8116514188a53..6570cda02f87d 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -202,6 +202,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
List shardContexts = toEvaluator.shardContexts();
LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
+ Integer numSnippets = this.numSnippets == null ?DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small());
+ Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small());
+
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
SearchExecutionContext searchExecutionContext = shardContext.searchExecutionContext();
@@ -210,6 +213,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
throw new IllegalStateException("Missing search context, cannot extract snippets");
}
+
try {
// TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
HighlightBuilder highlightBuilder = new HighlightBuilder();
@@ -220,9 +224,10 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
highlightBuilder.field(field.sourceText()).preTags("").postTags("");
// Return highest scoring fragments
highlightBuilder.order(HighlightBuilder.Order.SCORE);
- highlightBuilder.numOfFragments(Integer.parseInt(numSnippets.sourceText()));
- highlightBuilder.fragmentSize(Integer.parseInt(snippetLength.sourceText()));
- highlightBuilder.noMatchSize(Integer.parseInt(snippetLength.sourceText()));
+
+ highlightBuilder.numOfFragments(numSnippets);
+ highlightBuilder.fragmentSize(snippedSize);
+ highlightBuilder.noMatchSize(snippedSize);
SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext);
searchContext.highlight(highlightContext);
@@ -234,9 +239,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
+ "], str ["
+ str.sourceText()
+ "], numSnippets: ["
- + Integer.parseInt(numSnippets.sourceText())
+ + numSnippets
+ "], snippetLength: ["
- + Integer.parseInt(snippetLength.sourceText())
+ + snippetLength
+ "]",
e
);
@@ -246,10 +251,8 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
}
// Get field name and search context from the first shard context
String fieldNameStr = field.sourceText();
- int numFragments = numSnippets == null ? DEFAULT_NUM_SNIPPETS : Integer.parseInt(numSnippets.sourceText());
- int fragmentSize = snippetLength == null ? DEFAULT_SNIPPET_LENGTH : Integer.parseInt(snippetLength.sourceText());
SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext();
- return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numFragments, fragmentSize, firstSearchContext);
+ return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numSnippets, snippedSize, firstSearchContext);
}
@Override
From 34c10f5c80bfc2187646e63db721c451f5246595 Mon Sep 17 00:00:00 2001
From: cdelgado
Date: Mon, 11 Aug 2025 14:13:34 +0200
Subject: [PATCH 15/44] Use SORT to push down the EVAL clause, so it's executed
on local nodes
---
.../org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
index aea176b48e000..1637c2476bd38 100644
--- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
+++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java
@@ -31,6 +31,7 @@ public void testExtractSnippets() {
var query = """
FROM test
| EVAL x = extract_snippets(content, "fox", 1, 10)
+ | SORT x
| KEEP x
""";
From 02cebe76df477f85a337e0520ce03e29df915211 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Mon, 11 Aug 2025 19:50:55 +0000
Subject: [PATCH 16/44] [CI] Auto commit changes from spotless
---
.../compute/lucene/LuceneQueryExpressionEvaluator.java | 4 +---
.../compute/lucene/LuceneQueryExpressionEvaluatorTests.java | 1 -
.../xpack/esql/capabilities/RewriteableAware.java | 2 +-
.../expression/function/scalar/string/ExtractSnippets.java | 3 +--
4 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
index c08f9b8822925..c249620060685 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
@@ -27,9 +27,7 @@
* a {@link BooleanVector}.
* @see LuceneQueryScoreEvaluator
*/
-public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator
- implements
- EvalOperator.ExpressionEvaluator {
+public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator {
LuceneQueryExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shards) {
super(blockFactory, shards);
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
index f6808962fcdf6..616679669b46f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
@@ -8,7 +8,6 @@
package org.elasticsearch.compute.lucene;
import org.apache.lucene.search.Scorable;
-import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BooleanBlock;
import org.elasticsearch.compute.data.Page;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index d4142833298c7..097bee3a89343 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -10,7 +10,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-public interface RewriteableAware extends TranslationAware{
+public interface RewriteableAware extends TranslationAware {
QueryBuilder queryBuilder();
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 6570cda02f87d..78972373896b2 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -202,7 +202,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
List shardContexts = toEvaluator.shardContexts();
LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
- Integer numSnippets = this.numSnippets == null ?DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small());
+ Integer numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small());
Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small());
int i = 0;
@@ -213,7 +213,6 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
throw new IllegalStateException("Missing search context, cannot extract snippets");
}
-
try {
// TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
HighlightBuilder highlightBuilder = new HighlightBuilder();
From b923a2ec800c40c701974c4750548f27385fd5de Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 12 Aug 2025 14:32:14 -0400
Subject: [PATCH 17/44] Workaround for rewrite
---
.../highlight/DefaultHighlighter.java | 4 --
.../HighlighterExpressionEvaluator.java | 43 ++++++++++++-------
.../fulltext/QueryBuilderResolver.java | 3 +-
.../scalar/string/ExtractSnippets.java | 5 ++-
4 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index 927ad0f5ad434..0424cdfc7e098 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -60,10 +60,6 @@ public boolean canHighlight(MappedFieldType fieldType) {
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
@SuppressWarnings("unchecked")
- // Map cache = (Map) fieldContext.cache.computeIfAbsent(
- // UnifiedHighlighter.class.getName(),
- // k -> new HashMap<>()
- // );
Map cache = (Map) fieldContext.cache.getOrDefault(
UnifiedHighlighter.class.getName(),
new HashMap<>()
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index e7112249d89b6..1c708a6e3a4e3 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -86,6 +86,22 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
throws IOException {
// TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes?
+
+ // Create a source loader for highlighter use
+ SourceLoader sourceLoader = searchContext.newSourceLoader(null);
+ FetchContext fetchContext = new FetchContext(searchContext, sourceLoader);
+ MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
+ SearchHit searchHit = new SearchHit(docId);
+ Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
+ String defaultHighlighter = fieldType.getDefaultHighlighter();
+
+ Highlighter highlighter;
+ // if (SemanticTextHighlighter.NAME.equals(defaultHighlighter)) {
+ // highlighter = new SemanticTextHighlighter();
+ // } else {
+ highlighter = new DefaultHighlighter();
+ // }
+
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS);
optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE);
@@ -94,12 +110,6 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
optionsBuilder.requireFieldMatch(false);
optionsBuilder.scoreOrdered(true);
SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
- // Create a source loader for highlighter use
- SourceLoader sourceLoader = searchContext.newSourceLoader(null);
- FetchContext fetchContext = new FetchContext(searchContext, sourceLoader);
- MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
- SearchHit searchHit = new SearchHit(docId);
- Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null);
FieldHighlightContext highlightContext = new FieldHighlightContext(
@@ -111,18 +121,19 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
query,
new HashMap<>()
);
- Highlighter highlighter = new DefaultHighlighter();
HighlightField highlight = highlighter.highlight(highlightContext);
- boolean multivalued = highlight.fragments().length > 1;
- if (multivalued) {
- builder.beginPositionEntry();
- }
- for (Text highlightText : highlight.fragments()) {
- builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes()));
- }
- if (multivalued) {
- builder.endPositionEntry();
+ if (highlight != null) {
+ boolean multivalued = highlight.fragments().length > 1;
+ if (multivalued) {
+ builder.beginPositionEntry();
+ }
+ for (Text highlightText : highlight.fragments()) {
+ builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes()));
+ }
+ if (multivalued) {
+ builder.endPositionEntry();
+ }
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index 159d179e9fd7b..4a33168be0b31 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -90,7 +90,8 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
: builder;
try {
- builder = builder.rewrite(ctx);
+ // builder = builder.rewrite(ctx);
+ builder = Rewriteable.rewrite(builder, ctx);
} catch (IOException e) {
exceptionHolder.setIfAbsent(e);
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 78972373896b2..33aabe4682331 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -15,6 +15,7 @@
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
@@ -217,7 +218,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
// TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
HighlightBuilder highlightBuilder = new HighlightBuilder();
if (queryBuilder != null) {
- highlightBuilder.highlightQuery(queryBuilder);
+ // TODO validate this works and determine why this is not working in query builder resolver
+ QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext);
+ highlightBuilder.highlightQuery(rewritten);
}
// Stripping pre/post tags as they're not useful for snippet creation
highlightBuilder.field(field.sourceText()).preTags("").postTags("");
From 5b9347cf978b5741979ca624f3a0be3af2b9e3a4 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 12 Aug 2025 14:47:47 -0400
Subject: [PATCH 18/44] Make highlighters accessible
---
.../elasticsearch/search/SearchModule.java | 18 ++++++++++++++++
.../highlight/SearchHighlightContext.java | 2 +-
.../HighlighterExpressionEvaluator.java | 21 +++++++++----------
.../xpack/esql/evaluator/EvalMapper.java | 12 +++++++++++
.../evaluator/mapper/EvaluatorMapper.java | 6 ++++++
.../scalar/string/ExtractSnippets.java | 9 +++++++-
.../xpack/esql/plugin/EsqlPlugin.java | 7 ++++++-
7 files changed, 61 insertions(+), 14 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
index 56b203700b362..f93597b0f3e68 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -278,6 +278,8 @@
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
*/
public class SearchModule {
+ private static volatile Map staticHighlighters = Map.of();
+
public static final Setting INDICES_MAX_CLAUSE_COUNT_SETTING = Setting.intSetting(
"indices.query.bool.max_clause_count",
4096,
@@ -920,6 +922,20 @@ private static Map setupHighlighters(Settings settings, Lis
return unmodifiableMap(highlighters.getRegistry());
}
+ /**
+ * Sets the static highlighters map for access by other plugins
+ */
+ private static void setStaticHighlighters(Map highlighters) {
+ staticHighlighters = Map.copyOf(highlighters);
+ }
+
+ /**
+ * Gets the static highlighters map for other plugin access
+ */
+ public static Map getStaticHighlighters() {
+ return staticHighlighters;
+ }
+
private void registerScoreFunctions(List plugins) {
// ScriptScoreFunctionBuilder has it own named writable because of a new script_score query
namedWriteables.add(
@@ -1059,6 +1075,8 @@ private void registerFetchSubPhases(List plugins) {
registerFetchSubPhase(new HighlightPhase(highlighters));
registerFetchSubPhase(new FetchScorePhase());
+ setStaticHighlighters(highlighters);
+
FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters);
registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase);
}
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
index f06c667a073ba..111805be5b905 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
@@ -269,7 +269,7 @@ Builder boundaryScannerLocale(Locale boundaryScannerLocale) {
return this;
}
- Builder highlightQuery(Query highlightQuery) {
+ public Builder highlightQuery(Query highlightQuery) {
fieldOptions.highlightQuery = highlightQuery;
return this;
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 1c708a6e3a4e3..122e9ffe7cb1e 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -50,6 +50,7 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters;
HighlighterExpressionEvaluator(
BlockFactory blockFactory,
@@ -57,13 +58,15 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters
) {
super(blockFactory, shardConfigs);
this.fieldName = fieldName;
this.numFragments = numFragments;
this.fragmentLength = fragmentLength;
this.searchContext = searchContext;
+ this.highlighters = highlighters;
}
@Override
@@ -93,14 +96,7 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
SearchHit searchHit = new SearchHit(docId);
Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
- String defaultHighlighter = fieldType.getDefaultHighlighter();
-
- Highlighter highlighter;
- // if (SemanticTextHighlighter.NAME.equals(defaultHighlighter)) {
- // highlighter = new SemanticTextHighlighter();
- // } else {
- highlighter = new DefaultHighlighter();
- // }
+ Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter());
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS);
@@ -109,6 +105,7 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
optionsBuilder.postTags(new String[] { "" });
optionsBuilder.requireFieldMatch(false);
optionsBuilder.scoreOrdered(true);
+ optionsBuilder.highlightQuery(query);
SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null);
@@ -165,7 +162,8 @@ public record Factory(
String fieldName,
Integer numFragments,
Integer fragmentSize,
- SearchContext searchContext
+ SearchContext searchContext,
+ Map highlighters
) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
@@ -175,7 +173,8 @@ public EvalOperator.ExpressionEvaluator get(DriverContext context) {
fieldName,
numFragments,
fragmentSize,
- searchContext
+ searchContext,
+ highlighters
);
}
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
index d054a8cecb072..ac172eb7aa2ab 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
@@ -29,9 +29,16 @@
import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.InsensitiveEqualsMapper;
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders.ShardContext;
+import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter;
+import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.esql.planner.Layout;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
public final class EvalMapper {
@@ -79,6 +86,11 @@ public FoldContext foldCtx() {
public List shardContexts() {
return shardContexts;
}
+
+ @Override
+ public Map highlighters() {
+ return SearchModule.getStaticHighlighters();
+ }
});
}
for (ExpressionMapper em : MAPPERS) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
index a4a17297abc09..3f561f22c4c24 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
@@ -17,6 +17,7 @@
import org.elasticsearch.indices.breaker.AllCircuitBreakerStats;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.indices.breaker.CircuitBreakerStats;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -25,6 +26,7 @@
import org.elasticsearch.xpack.esql.planner.Layout;
import java.util.List;
+import java.util.Map;
import static org.elasticsearch.compute.data.BlockUtils.fromArrayRow;
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
@@ -41,6 +43,10 @@ interface ToEvaluator {
default List shardContexts() {
throw new UnsupportedOperationException("Shard contexts should only be needed for evaluation operations");
}
+
+ default Map highlighters() {
+ throw new UnsupportedOperationException("Highlighters should only be needed for highlight operations");
+ }
}
/**
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 33aabe4682331..9cdaac96d3a34 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -254,7 +254,14 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
// Get field name and search context from the first shard context
String fieldNameStr = field.sourceText();
SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext();
- return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numSnippets, snippedSize, firstSearchContext);
+ return new HighlighterExpressionEvaluator.Factory(
+ shardConfigs,
+ fieldNameStr,
+ numSnippets,
+ snippedSize,
+ firstSearchContext,
+ toEvaluator.highlighters()
+ );
}
@Override
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index f2f5b6b640311..a2aecaec98ac9 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -42,6 +42,9 @@
import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.ExtensiblePlugin;
import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.plugins.SearchPlugin;
+import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestHandler;
import org.elasticsearch.threadpool.ExecutorBuilder;
@@ -82,12 +85,14 @@
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.function.Supplier;
-public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin {
+public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin {
public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled();
public static final String ESQL_WORKER_THREAD_POOL_NAME = "esql_worker";
From 44b1bc45edee6d630d535391d90379add82f1392 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Tue, 12 Aug 2025 19:49:49 +0000
Subject: [PATCH 19/44] [CI] Auto commit changes from spotless
---
.../elasticsearch/xpack/esql/evaluator/EvalMapper.java | 8 ++------
.../org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 4 ----
2 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
index ac172eb7aa2ab..642111d5d480b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
@@ -19,6 +19,8 @@
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.Releasables;
+import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -29,14 +31,8 @@
import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.InsensitiveEqualsMapper;
import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders.ShardContext;
-import org.elasticsearch.search.SearchModule;
-import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
-import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
-import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter;
-import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.esql.planner.Layout;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index a2aecaec98ac9..47e7afc88f643 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -43,8 +43,6 @@
import org.elasticsearch.plugins.ExtensiblePlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.SearchPlugin;
-import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
-import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestHandler;
import org.elasticsearch.threadpool.ExecutorBuilder;
@@ -85,9 +83,7 @@
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.function.Supplier;
From 82412d8c9d183e058a8240c341f224ec0c000dc0 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 12 Aug 2025 15:51:01 -0400
Subject: [PATCH 20/44] Return semantic highlight results
---
.../xpack/inference/highlight/SemanticTextHighlighter.java | 3 +++
1 file changed, 3 insertions(+)
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java
index 92333a10c4d08..cfbeb4718a6f6 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java
@@ -21,6 +21,7 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType;
@@ -304,6 +305,8 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
public void visitLeaf(Query query) {
if (query instanceof MatchAllDocsQuery) {
queries.add(new MatchAllDocsQuery());
+ } else if (query instanceof ToParentBlockJoinQuery toParentBlockJoinQuery) {
+ queries.add(toParentBlockJoinQuery.getChildQuery());
}
}
});
From d4ba21de604162bb11a5fd0df80c4f7addcc518d Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Tue, 12 Aug 2025 20:33:46 +0000
Subject: [PATCH 21/44] [CI] Auto commit changes from spotless
---
.../esql/expression/function/fulltext/FullTextFunction.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
index 79717ffa4ebb8..a9255cb11dba3 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java
@@ -16,8 +16,8 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
-import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
+import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
From 932864a010a4f3439855033a0ab96a36a56d79fc Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Wed, 13 Aug 2025 13:54:51 -0400
Subject: [PATCH 22/44] Cleanup
---
.../java/org/elasticsearch/search/SearchHit.java | 2 +-
.../java/org/elasticsearch/search/SearchModule.java | 13 ++-----------
.../subphase/highlight/DefaultHighlighter.java | 4 ++--
.../fetch/subphase/highlight/HighlightPhase.java | 1 -
.../lucene/HighlighterExpressionEvaluator.java | 5 ++++-
.../xpack/esql/capabilities/RewriteableAware.java | 12 ++++++++++++
.../expression/function/EsqlFunctionRegistry.java | 1 -
.../function/fulltext/QueryBuilderResolver.java | 2 +-
.../function/scalar/string/ExtractSnippets.java | 10 +++++-----
.../elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 1 +
10 files changed, 28 insertions(+), 23 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java
index a9c8e01fa32ac..41ba6e2099ff4 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchHit.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java
@@ -123,7 +123,7 @@ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity) {
this(nestedTopDocId, id, nestedIdentity, null);
}
- private SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
+ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
this(
nestedTopDocId,
DEFAULT_SCORE,
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
index d53a9a44a74b6..0509c9e78eac7 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -923,16 +923,6 @@ private static Map setupHighlighters(Settings settings, Lis
return unmodifiableMap(highlighters.getRegistry());
}
- /**
- * Sets the static highlighters map for access by other plugins
- */
- private static void setStaticHighlighters(Map highlighters) {
- staticHighlighters = Map.copyOf(highlighters);
- }
-
- /**
- * Gets the static highlighters map for other plugin access
- */
public static Map getStaticHighlighters() {
return staticHighlighters;
}
@@ -1076,7 +1066,8 @@ private void registerFetchSubPhases(List plugins) {
registerFetchSubPhase(new HighlightPhase(highlighters));
registerFetchSubPhase(new FetchScorePhase());
- setStaticHighlighters(highlighters);
+ // Store highlighters in a static map for other plugins to access
+ staticHighlighters = Map.copyOf(highlighters);
FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters);
registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase);
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index 0424cdfc7e098..9ae3a1349510e 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -60,9 +60,9 @@ public boolean canHighlight(MappedFieldType fieldType) {
@Override
public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException {
@SuppressWarnings("unchecked")
- Map cache = (Map) fieldContext.cache.getOrDefault(
+ Map cache = (Map) fieldContext.cache.computeIfAbsent(
UnifiedHighlighter.class.getName(),
- new HashMap<>()
+ k -> new HashMap<>()
);
if (cache.containsKey(fieldContext.fieldName) == false) {
cache.put(fieldContext.fieldName, buildHighlighter(fieldContext));
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
index bd0bddea0261d..cf9e8fbf7ded0 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java
@@ -66,7 +66,6 @@ public void process(HitContext hitContext) throws IOException {
Map> contextBuilders = fieldContext.builders;
for (String field : contextBuilders.keySet()) {
FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext);
- // TODO create this in ES|QL when processing matches
Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType);
HighlightField highlightField = highlighter.highlight(fieldContext);
if (highlightField != null) {
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 122e9ffe7cb1e..b8278918ce745 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -42,6 +42,8 @@
import java.util.Map;
import java.util.function.Supplier;
+import static org.elasticsearch.core.RefCounted.ALWAYS_REFERENCED;
+
public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
implements
EvalOperator.ExpressionEvaluator {
@@ -94,10 +96,11 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
SourceLoader sourceLoader = searchContext.newSourceLoader(null);
FetchContext fetchContext = new FetchContext(searchContext, sourceLoader);
MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
- SearchHit searchHit = new SearchHit(docId);
+ SearchHit searchHit = new SearchHit(docId, null, null, ALWAYS_REFERENCED);
Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter());
+ // TODO: Consolidate these options with the ones built in the text similarity reranker
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS);
optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index 097bee3a89343..7841a13b64966 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -10,10 +10,22 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+/**
+ * Defines objects that need to go through the rewrite phase.
+ */
public interface RewriteableAware extends TranslationAware {
+ /**
+ * @return The current active query builder.
+ */
QueryBuilder queryBuilder();
+ /**
+ * Replaces the current query builder with a rewritten iteration. This happens multiple times through the rewrite phase until
+ * the final iteration of the query builder is stored.
+ * @param queryBuilder QueryBuilder
+ * @return Expression defining the active QueryBuilder
+ */
Expression replaceQueryBuilder(QueryBuilder queryBuilder);
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
index 7bb11ae54d967..67637e5db5a53 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
@@ -505,7 +505,6 @@ private static FunctionDefinition[][] snapshotFunctions() {
def(DotProduct.class, DotProduct::new, "v_dot_product"),
def(L1Norm.class, L1Norm::new, "v_l1_norm"),
def(L2Norm.class, L2Norm::new, "v_l2_norm"),
- def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"),
def(ExtractSnippets.class, quad(ExtractSnippets::new), "extract_snippets") } };
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index 4a33168be0b31..ff94211e07963 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -90,7 +90,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
: builder;
try {
- // builder = builder.rewrite(ctx);
+ // TODO: Even when changing this to Rewriteable#rewrite, this still doesn't execute the full rewrite phase. Bug?
builder = Rewriteable.rewrite(builder, ctx);
} catch (IOException e) {
exceptionHolder.setIfAbsent(e);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 9cdaac96d3a34..6a9d5626f076b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -67,9 +67,10 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum
);
private static final int DEFAULT_NUM_SNIPPETS = 1;
- private static final int DEFAULT_SNIPPET_LENGTH = 10; // TODO determine a good default. 512 * 5?
+ // TODO: This default should be in line with the text similarity reranker. Set artificially low here for POC purposes.
+ private static final int DEFAULT_SNIPPET_LENGTH = 10;
- // TODO better names?
+ // TODO: better names?
private final Expression field, str, numSnippets, snippetLength;
private final QueryBuilder queryBuilder;
@@ -218,13 +219,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
// TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
HighlightBuilder highlightBuilder = new HighlightBuilder();
if (queryBuilder != null) {
- // TODO validate this works and determine why this is not working in query builder resolver
+ // TODO: Ideally we'd only need to rewrite in the QueryBuilderResolver, but we need semantic rewrites to happen
+ // on both coordinator and data nodes.
QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext);
highlightBuilder.highlightQuery(rewritten);
}
- // Stripping pre/post tags as they're not useful for snippet creation
highlightBuilder.field(field.sourceText()).preTags("").postTags("");
- // Return highest scoring fragments
highlightBuilder.order(HighlightBuilder.Order.SCORE);
highlightBuilder.numOfFragments(numSnippets);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index 98e8ef61b4b12..94f293ce0f5d0 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -89,6 +89,7 @@
import java.util.function.Predicate;
import java.util.function.Supplier;
+// NOTE: SearchPlugin required to get access to highlighters
public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin {
public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled();
From 632df21b6dca1b6ebd7962daeccd2d594ae24c99 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Wed, 13 Aug 2025 19:09:12 +0000
Subject: [PATCH 23/44] [CI] Auto commit changes from spotless
---
.../elasticsearch/xpack/esql/capabilities/RewriteableAware.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
index 7841a13b64966..41886573fdbb2 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java
@@ -24,7 +24,7 @@ public interface RewriteableAware extends TranslationAware {
* Replaces the current query builder with a rewritten iteration. This happens multiple times through the rewrite phase until
* the final iteration of the query builder is stored.
* @param queryBuilder QueryBuilder
- * @return Expression defining the active QueryBuilder
+ * @return Expression defining the active QueryBuilder
*/
Expression replaceQueryBuilder(QueryBuilder queryBuilder);
From 838b054c8caef67c471673cd3230d4a084867d1e Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 14 Aug 2025 14:59:00 -0400
Subject: [PATCH 24/44] Move highlighters from EvalMapper to SearchContext
---
.../org/elasticsearch/search/internal/SearchContext.java | 6 ++++++
.../org/elasticsearch/xpack/esql/evaluator/EvalMapper.java | 5 -----
.../xpack/esql/evaluator/mapper/EvaluatorMapper.java | 6 ------
.../expression/function/scalar/string/ExtractSnippets.java | 4 +++-
.../org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 3 +--
5 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
index 7d018a7ef4ba9..cb3ddb7deb5cc 100644
--- a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
+++ b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
@@ -28,6 +28,7 @@
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.search.RescoreDocIds;
import org.elasticsearch.search.SearchExtBuilder;
+import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.SearchContextAggregations;
import org.elasticsearch.search.collapse.CollapseContext;
@@ -40,6 +41,7 @@
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.lookup.SourceFilter;
import org.elasticsearch.search.profile.Profilers;
@@ -152,6 +154,10 @@ public final boolean isClosed() {
public abstract void highlight(SearchHighlightContext highlight);
+ public Map highlighters() {
+ return SearchModule.getStaticHighlighters();
+ }
+
public InnerHitsContext innerHits() {
if (innerHitsContext == null) {
innerHitsContext = new InnerHitsContext();
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
index 642111d5d480b..a6b01c34d5817 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
@@ -82,11 +82,6 @@ public FoldContext foldCtx() {
public List shardContexts() {
return shardContexts;
}
-
- @Override
- public Map highlighters() {
- return SearchModule.getStaticHighlighters();
- }
});
}
for (ExpressionMapper em : MAPPERS) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
index 3f561f22c4c24..a4a17297abc09 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java
@@ -17,7 +17,6 @@
import org.elasticsearch.indices.breaker.AllCircuitBreakerStats;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.indices.breaker.CircuitBreakerStats;
-import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -26,7 +25,6 @@
import org.elasticsearch.xpack.esql.planner.Layout;
import java.util.List;
-import java.util.Map;
import static org.elasticsearch.compute.data.BlockUtils.fromArrayRow;
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
@@ -43,10 +41,6 @@ interface ToEvaluator {
default List shardContexts() {
throw new UnsupportedOperationException("Shard contexts should only be needed for evaluation operations");
}
-
- default Map highlighters() {
- throw new UnsupportedOperationException("Highlighters should only be needed for highlight operations");
- }
}
/**
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 6a9d5626f076b..cd53d3d67e86c 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -18,6 +18,7 @@
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
@@ -254,13 +255,14 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
// Get field name and search context from the first shard context
String fieldNameStr = field.sourceText();
SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext();
+ Map highlighters = firstSearchContext == null ? Map.of() : firstSearchContext.highlighters();
return new HighlighterExpressionEvaluator.Factory(
shardConfigs,
fieldNameStr,
numSnippets,
snippedSize,
firstSearchContext,
- toEvaluator.highlighters()
+ highlighters
);
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index 94f293ce0f5d0..5dfa46f369662 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -89,8 +89,7 @@
import java.util.function.Predicate;
import java.util.function.Supplier;
-// NOTE: SearchPlugin required to get access to highlighters
-public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin {
+public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin {
public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled();
public static final String ESQL_WORKER_THREAD_POOL_NAME = "esql_worker";
From 0b0487e2556a7c73117a6ecad52aa7acae4d82c9 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 14 Aug 2025 15:06:00 -0400
Subject: [PATCH 25/44] Update
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
Co-authored-by: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com>
---
.../esql/expression/function/fulltext/QueryBuilderResolver.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index ff94211e07963..22df04eb9e823 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -84,7 +84,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
Holder updated = new Holder<>(false);
LogicalPlan newPlan = plan.transformExpressionsDown(Expression.class, expr -> {
Expression finalExpression = expr;
- if (expr instanceof RewriteableAware rewriteableAware && expr instanceof TranslationAware translationAware) {
+ if (expr instanceof RewriteableAware rewriteableAware) {
QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder;
builder = builder == null
? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
From eee88bec15b31530a88223b12eebb673a0d63054 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Thu, 14 Aug 2025 19:13:30 +0000
Subject: [PATCH 26/44] [CI] Auto commit changes from spotless
---
.../org/elasticsearch/xpack/esql/evaluator/EvalMapper.java | 3 ---
.../expression/function/fulltext/QueryBuilderResolver.java | 1 -
.../java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 1 -
3 files changed, 5 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
index a6b01c34d5817..d054a8cecb072 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java
@@ -19,8 +19,6 @@
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.core.Releasables;
-import org.elasticsearch.search.SearchModule;
-import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
@@ -34,7 +32,6 @@
import org.elasticsearch.xpack.esql.planner.Layout;
import java.util.List;
-import java.util.Map;
public final class EvalMapper {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index 22df04eb9e823..592531b33ac10 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -13,7 +13,6 @@
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.xpack.esql.capabilities.RewriteableAware;
-import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index 5dfa46f369662..776874fbf90f6 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -43,7 +43,6 @@
import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.ExtensiblePlugin;
import org.elasticsearch.plugins.Plugin;
-import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestHandler;
import org.elasticsearch.threadpool.ExecutorBuilder;
From 77b44d5de5d4130ac1cefe5a8eac285d68f4c1e5 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 14 Aug 2025 15:12:59 -0400
Subject: [PATCH 27/44] Cleanup how we pull field attributes in extract
snippets
---
.../scalar/string/ExtractSnippets.java | 28 ++++++++++++-------
1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index cd53d3d67e86c..d25bacb0dedff 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -225,7 +225,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext);
highlightBuilder.highlightQuery(rewritten);
}
- highlightBuilder.field(field.sourceText()).preTags("").postTags("");
+ highlightBuilder.field(fieldName()).preTags("").postTags("");
highlightBuilder.order(HighlightBuilder.Order.SCORE);
highlightBuilder.numOfFragments(numSnippets);
@@ -238,9 +238,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
} catch (IOException e) {
throw new RuntimeException(
"Failed to create highlight context for field ["
- + field.sourceText()
+ + fieldName()
+ "], str ["
- + str.sourceText()
+ + searchString()
+ "], numSnippets: ["
+ numSnippets
+ "], snippetLength: ["
@@ -253,12 +253,11 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher());
}
// Get field name and search context from the first shard context
- String fieldNameStr = field.sourceText();
- SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext();
+ SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.getFirst().searchContext();
Map highlighters = firstSearchContext == null ? Map.of() : firstSearchContext.highlighters();
return new HighlighterExpressionEvaluator.Factory(
shardConfigs,
- fieldNameStr,
+ fieldName(),
numSnippets,
snippedSize,
firstSearchContext,
@@ -290,22 +289,31 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand
}
private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) {
- var fieldAttribute = fieldAsFieldAttribute(field());
- Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument");
- String fieldName = getNameFromFieldAttribute(fieldAttribute);
Object query = str().fold(FoldContext.small());
// Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided
- return new MatchQuery(source(), fieldName, query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true));
+ return new MatchQuery(source(), fieldName(), query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true));
}
Expression field() {
return field;
}
+ private String fieldName() {
+ var fieldAttribute = fieldAsFieldAttribute(field());
+ Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument");
+ return getNameFromFieldAttribute(fieldAttribute);
+ }
+
Expression str() {
return str;
}
+ private String searchString() {
+ var strAttribute = fieldAsFieldAttribute(str());
+ Check.notNull(strAttribute, "Highlight must have a str attribute as the second argument");
+ return getNameFromFieldAttribute(strAttribute);
+ }
+
Expression numSnippets() {
return numSnippets;
}
From 5ab3c568345c9e2809f8b704796fb127db1ccd90 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 14 Aug 2025 15:30:29 -0400
Subject: [PATCH 28/44] Fix compilation error due to auto-commit suggestion
---
.../esql/expression/function/fulltext/QueryBuilderResolver.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
index 592531b33ac10..cbc2f598abb60 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java
@@ -86,7 +86,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException {
if (expr instanceof RewriteableAware rewriteableAware) {
QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder;
builder = builder == null
- ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
+ ? rewriteableAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder()
: builder;
try {
// TODO: Even when changing this to Rewriteable#rewrite, this still doesn't execute the full rewrite phase. Bug?
From a6a0f11277d1d71e439d2ce0569b7d5045246e66 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 18 Aug 2025 11:05:36 -0400
Subject: [PATCH 29/44] Add queryBuilder to ExtractSnippets#info
---
.../esql/expression/function/scalar/string/ExtractSnippets.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index d25bacb0dedff..10811fa9dd628 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -197,7 +197,7 @@ public Expression replaceChildren(List newChildren) {
@Override
protected NodeInfo extends Expression> info() {
- return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength);
+ return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength, queryBuilder);
}
@Override
From 9c7609c653fbcbaf79440cce1b3ad72a494c29b0 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 18 Aug 2025 11:15:53 -0400
Subject: [PATCH 30/44] Move construction of objects to ctor when possible
---
.../lucene/HighlighterExpressionEvaluator.java | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index b8278918ce745..230e6e36ff752 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -22,6 +22,7 @@
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
@@ -51,8 +52,9 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters;
+ private final FetchContext fetchContext;
+ private final MappedFieldType fieldType;
HighlighterExpressionEvaluator(
BlockFactory blockFactory,
@@ -67,8 +69,16 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
Date: Mon, 18 Aug 2025 11:39:12 -0400
Subject: [PATCH 31/44] Refactor highlighting logic into util class
---
.../highlight/HighlightSnippetUtils.java | 60 +++++++++++++++++++
.../HighlighterExpressionEvaluator.java | 19 +++---
...nkingRankFeaturePhaseRankShardContext.java | 19 +++---
3 files changed, 76 insertions(+), 22 deletions(-)
create mode 100644 server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
new file mode 100644
index 0000000000000..bb7cf4ba0e675
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.SearchExecutionContext;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Utility class for building highlighting queries for the purpose of extracting snippets.
+ */
+public class HighlightSnippetUtils {
+
+ public static SearchHighlightContext buildSearchHighlightContextForSnippets(
+ SearchExecutionContext searchExecutionContext,
+ String field,
+ int numSnippets,
+ int snippetCharLength,
+ QueryBuilder queryBuilder
+ ) throws IOException {
+ SearchHighlightContext.Field highlightField = buildFieldHighlightContextForSnippets(
+ searchExecutionContext,
+ field,
+ numSnippets,
+ snippetCharLength,
+ queryBuilder.toQuery(searchExecutionContext)
+ );
+ return new SearchHighlightContext(List.of(highlightField));
+ }
+
+ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets(
+ SearchExecutionContext searchExecutionContext,
+ String fieldName,
+ int numSnippets,
+ int snippetCharLength,
+ Query query
+ ) {
+ SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
+ optionsBuilder.numberOfFragments(numSnippets);
+ optionsBuilder.fragmentCharSize(snippetCharLength);
+ optionsBuilder.noMatchSize(snippetCharLength);
+ optionsBuilder.preTags(new String[] { "" });
+ optionsBuilder.postTags(new String[] { "" });
+ optionsBuilder.requireFieldMatch(false);
+ optionsBuilder.scoreOrdered(true);
+ optionsBuilder.highlightQuery(query);
+ return new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
+ }
+
+}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 230e6e36ff752..625df8768d6ef 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -30,6 +30,7 @@
import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
@@ -106,17 +107,13 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter());
- // TODO: Consolidate these options with the ones built in the text similarity reranker
- SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
- optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS);
- optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE);
- optionsBuilder.preTags(new String[] { "" });
- optionsBuilder.postTags(new String[] { "" });
- optionsBuilder.requireFieldMatch(false);
- optionsBuilder.scoreOrdered(true);
- optionsBuilder.highlightQuery(query);
- SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
-
+ SearchHighlightContext.Field field = HighlightSnippetUtils.buildFieldHighlightContextForSnippets(
+ fetchContext.getSearchExecutionContext(),
+ fieldName,
+ numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS,
+ fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE,
+ query
+ );
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null);
FieldHighlightContext highlightContext = new FieldHighlightContext(
fieldName,
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java
index 66fb4a366a757..5c3ae35f72ea2 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java
@@ -12,8 +12,8 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.rank.RankShardResult;
@@ -73,20 +73,17 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI
public void prepareForFetch(SearchContext context) {
if (snippetRankInput != null) {
try {
- HighlightBuilder highlightBuilder = new HighlightBuilder();
- highlightBuilder.highlightQuery(snippetRankInput.snippetQueryBuilder());
- // Stripping pre/post tags as they're not useful for snippet creation
- highlightBuilder.field(field).preTags("").postTags("");
- // Return highest scoring fragments
- highlightBuilder.order(HighlightBuilder.Order.SCORE);
int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS;
- highlightBuilder.numOfFragments(numSnippets);
// Rely on the model to determine the fragment size
int tokenSizeLimit = snippetRankInput.tokenSizeLimit();
int fragmentSize = tokenSizeLimit * TOKEN_SIZE_LIMIT_MULTIPLIER;
- highlightBuilder.fragmentSize(fragmentSize);
- highlightBuilder.noMatchSize(fragmentSize);
- SearchHighlightContext searchHighlightContext = highlightBuilder.build(context.getSearchExecutionContext());
+ SearchHighlightContext searchHighlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets(
+ context.getSearchExecutionContext(),
+ field,
+ numSnippets,
+ fragmentSize,
+ snippetRankInput.snippetQueryBuilder()
+ );
context.highlight(searchHighlightContext);
} catch (IOException e) {
throw new RuntimeException("Failed to generate snippet request", e);
From 675e78b2aa05a958bf7270eebf430d227b343e64 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 18 Aug 2025 14:26:04 -0400
Subject: [PATCH 32/44] Fix EsqlNodeSubclassTests#testReplaceChildren
---
.../expression/function/scalar/string/ExtractSnippets.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 10811fa9dd628..fd74e6eda6975 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -191,7 +191,8 @@ public Expression replaceChildren(List newChildren) {
newChildren.get(0), // field
newChildren.get(1), // str
numSnippets == null ? null : newChildren.get(2),
- snippetLength == null ? null : newChildren.get(3)
+ snippetLength == null ? null : newChildren.get(3),
+ queryBuilder
);
}
From d5c9d9141282ac260f2978eaf92571c9766586fa Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Mon, 18 Aug 2025 16:24:24 -0400
Subject: [PATCH 33/44] Start adding CSV tests
---
.../extract-snippets-function.csv-spec | 92 +++++++++++++++++++
.../xpack/esql/action/EsqlCapabilities.java | 7 +-
.../elasticsearch/xpack/esql/CsvTests.java | 4 +
3 files changed, 102 insertions(+), 1 deletion(-)
create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
new file mode 100644
index 0000000000000..db4df965f58ea
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
@@ -0,0 +1,92 @@
+###############################################
+# Tests for ExtractSnippets function
+#
+
+extractSnippetsWithField
+required_capability: extract_snippets_function
+
+// tag::extract-snippets-with-field[]
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 1, 25)
+// end::extract-snippets-with-field[]
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+// tag::extract-snippets-with-field-result[]
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit
+// end::extract-snippets-with-field-result[]
+;
+
+extractMultipleSnippetsWithField
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3, 25)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of The Hobbit, Tolkien's own children, The Hobbit]
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Hobbit, , THE HOBBIT: AN UNEXPECTED, film adaptation of The Hobbit]
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit
+;
+
+
+extractMultipleSnippetsWithFieldMvExpand
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3, 25)
+| MV_EXPAND snippets
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of The Hobbit
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, The Hobbit
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
+;
+
+extractMultipleSnippetsWithSomeNoMatches
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(author, "Faulkner")
+| EVAL snippets = extract_snippets(description, "slavery", 1, 10)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | abolition of slavery
+2713 | William Faulkner | Collected Stories of William Faulkner | null
+2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null
+2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null
+3293 | Danny Faulkner | Universe by Design | null
+;
+
+
+extractSnippetsWithDefaultNumSnippetsAndLength
+required_capability: extract_snippets_function
+
+extractSnippetsCalledMultipleTimes
+required_capability: extract_snippets_function
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index a09a9177203c4..95db01522e748 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -1345,7 +1345,12 @@ public enum Cap {
/**
* Support correct counting of skipped shards.
*/
- CORRECT_SKIPPED_SHARDS_COUNT;
+ CORRECT_SKIPPED_SHARDS_COUNT,
+
+ /**
+ * Support for the EXTRACT_SNIPPETS function.
+ */
+ EXTRACT_SNIPPETS_FUNCTION(Build.current().isSnapshot());
private final boolean enabled;
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
index d149fb012a14b..ca4ba0eff830b 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -335,6 +335,10 @@ public final void test() throws Throwable {
"CSV tests cannot currently handle multi_match function that depends on Lucene",
testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.capabilityName())
);
+ assumeFalse(
+ "CSV tests cannot currently handle EXTRACT_SNIPPETS",
+ testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.EXTRACT_SNIPPETS_FUNCTION.capabilityName())
+ );
if (Build.current().isSnapshot()) {
assertThat(
From bd369f794f6ce7b01dee5047116f755e3a671d8e Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 19 Aug 2025 14:44:02 -0400
Subject: [PATCH 34/44] Fix initialization error
---
.../extract-snippets-function.csv-spec | 31 ++++++++++++-------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
index db4df965f58ea..d512b1ce72a7e 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
@@ -6,26 +6,40 @@ extractSnippetsWithField
required_capability: extract_snippets_function
// tag::extract-snippets-with-field[]
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+// end::extract-snippets-with-field[]
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 1
+;
+
+// tag::extract-snippets-with-field-result[]
+book_no:keyword | author:text | title:text | snippets:keyword
+1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps the
+// end::extract-snippets-with-field-result[]
+;
+
+extractSnippetsWithMatch
+required_capability: extract_snippets_function
+
FROM books
| WHERE MATCH(description, "hobbit")
| EVAL snippets = extract_snippets(description, "hobbit", 1, 25)
-// end::extract-snippets-with-field[]
| KEEP book_no, author, title, snippets
| SORT book_no
| LIMIT 5
;
-// tag::extract-snippets-with-field-result[]
book_no:keyword | author:text | title:text | snippets:keyword
1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit
2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit
-// end::extract-snippets-with-field-result[]
;
-extractMultipleSnippetsWithField
+extractMultipleSnippetsWithMatch
required_capability: extract_snippets_function
FROM books
@@ -45,7 +59,7 @@ book_no:keyword | author:text | title:text
;
-extractMultipleSnippetsWithFieldMvExpand
+extractMultipleSnippetsWithMatchMvExpand
required_capability: extract_snippets_function
FROM books
@@ -83,10 +97,3 @@ book_no:keyword | author:text | title:tex
2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null
3293 | Danny Faulkner | Universe by Design | null
;
-
-
-extractSnippetsWithDefaultNumSnippetsAndLength
-required_capability: extract_snippets_function
-
-extractSnippetsCalledMultipleTimes
-required_capability: extract_snippets_function
From ccda43d8214b55e85ac9c51eaf7e76980400d077 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 19 Aug 2025 14:58:31 -0400
Subject: [PATCH 35/44] Clean up duplication when creating highlighter
---
.../scalar/string/ExtractSnippets.java | 37 ++++++++-----------
1 file changed, 15 insertions(+), 22 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index fd74e6eda6975..ee35ccfc9a14c 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -17,7 +17,7 @@
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.index.query.SearchExecutionContext;
-import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
@@ -68,10 +68,9 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum
);
private static final int DEFAULT_NUM_SNIPPETS = 1;
- // TODO: This default should be in line with the text similarity reranker. Set artificially low here for POC purposes.
+ // TODO: Determine good default, set artificially low for POC purposes
private static final int DEFAULT_SNIPPET_LENGTH = 10;
- // TODO: better names?
private final Expression field, str, numSnippets, snippetLength;
private final QueryBuilder queryBuilder;
@@ -206,8 +205,8 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
List shardContexts = toEvaluator.shardContexts();
LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()];
- Integer numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small());
- Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small());
+ int numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small());
+ int snippetSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small());
int i = 0;
for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) {
@@ -218,22 +217,16 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
}
try {
- // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch
- HighlightBuilder highlightBuilder = new HighlightBuilder();
- if (queryBuilder != null) {
- // TODO: Ideally we'd only need to rewrite in the QueryBuilderResolver, but we need semantic rewrites to happen
- // on both coordinator and data nodes.
- QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext);
- highlightBuilder.highlightQuery(rewritten);
- }
- highlightBuilder.field(fieldName()).preTags("").postTags("");
- highlightBuilder.order(HighlightBuilder.Order.SCORE);
-
- highlightBuilder.numOfFragments(numSnippets);
- highlightBuilder.fragmentSize(snippedSize);
- highlightBuilder.noMatchSize(snippedSize);
-
- SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext);
+ // We need to call rewrite here, to ensure we rewrite on both coordinator and data nodes.
+ assert queryBuilder != null : "ExtractSnippets missing required state";
+ QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext);
+ SearchHighlightContext highlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets(
+ searchExecutionContext,
+ fieldName(),
+ numSnippets,
+ snippetSize,
+ rewritten
+ );
searchContext.highlight(highlightContext);
} catch (IOException e) {
@@ -260,7 +253,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
shardConfigs,
fieldName(),
numSnippets,
- snippedSize,
+ snippetSize,
firstSearchContext,
highlighters
);
From 35120e68cda4ca7e5b2611ccfcb38bda18e4d0e8 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 19 Aug 2025 15:34:08 -0400
Subject: [PATCH 36/44] Support default parameters when not specified
---
.../scalar/string/ExtractSnippets.java | 25 ++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index ee35ccfc9a14c..159070ddf11c1 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -33,8 +33,8 @@
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
-import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
import org.elasticsearch.xpack.esql.expression.function.Param;
+import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
@@ -44,6 +44,7 @@
import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -60,7 +61,12 @@
/**
* Extract snippets function, that extracts the most relevant snippets from a given input string
*/
-public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware, EvaluatorMapper {
+public class ExtractSnippets extends EsqlScalarFunction
+ implements
+ TwoOptionalArguments,
+ RewriteableAware,
+ TranslationAware,
+ EvaluatorMapper {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
"ExtractSnippets",
@@ -108,7 +114,7 @@ public ExtractSnippets(
Expression snippetLength,
QueryBuilder queryBuilder
) {
- super(source, List.of(field, str, numSnippets, snippetLength));
+ super(source, fields(field, str, numSnippets, snippetLength));
this.field = field;
this.str = str;
this.numSnippets = numSnippets;
@@ -331,4 +337,17 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(field(), str(), numSnippets(), snippetLength(), queryBuilder());
}
+
+ private static List fields(Expression field, Expression str, Expression numSnippets, Expression snippetLength) {
+ List list = new ArrayList<>(4);
+ list.add(field);
+ list.add(str);
+ if (numSnippets != null) {
+ list.add(numSnippets);
+ if (snippetLength != null) {
+ list.add(snippetLength);
+ }
+ }
+ return list;
+ }
}
From de46fef7f19d603e9e25e777c517fa7c2d1eed05 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Tue, 19 Aug 2025 16:35:18 -0400
Subject: [PATCH 37/44] Fix char encoding bug for text fields (not
semantic_text)
---
.../search/fetch/subphase/highlight/HighlightSnippetUtils.java | 3 +++
1 file changed, 3 insertions(+)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
index bb7cf4ba0e675..ffba59dd1c47c 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
@@ -48,6 +48,9 @@ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numSnippets);
optionsBuilder.fragmentCharSize(snippetCharLength);
+ // Note: The default SENTENCE boundary scanner used by the DefaultHighlighter will return fragments larger than the specified
+ // snippetLength. This has implications when appending and calculating ByteArrays, so we specify WORD.
+ optionsBuilder.boundaryScannerType(HighlightBuilder.BoundaryScannerType.WORD);
optionsBuilder.noMatchSize(snippetCharLength);
optionsBuilder.preTags(new String[] { "" });
optionsBuilder.postTags(new String[] { "" });
From 5f20480c0c9d0a66ac3e0fe8bd2d309f3814f865 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Wed, 20 Aug 2025 14:53:23 -0400
Subject: [PATCH 38/44] Truncate snippets that are longer than requested size
---
.../highlight/HighlightSnippetUtils.java | 3 --
.../HighlighterExpressionEvaluator.java | 44 ++++++++++++++++---
2 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
index ffba59dd1c47c..bb7cf4ba0e675 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
@@ -48,9 +48,6 @@ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numSnippets);
optionsBuilder.fragmentCharSize(snippetCharLength);
- // Note: The default SENTENCE boundary scanner used by the DefaultHighlighter will return fragments larger than the specified
- // snippetLength. This has implications when appending and calculating ByteArrays, so we specify WORD.
- optionsBuilder.boundaryScannerType(HighlightBuilder.BoundaryScannerType.WORD);
optionsBuilder.noMatchSize(snippetCharLength);
optionsBuilder.preTags(new String[] { "" });
optionsBuilder.postTags(new String[] { "" });
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 625df8768d6ef..65e9375458299 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -39,6 +39,12 @@
import java.io.IOException;
import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -51,8 +57,8 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters;
private final FetchContext fetchContext;
private final MappedFieldType fieldType;
@@ -68,8 +74,8 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator fragmentLength) {
+ // TODO - This isn't a great solution, but in order to resolve character encoding issues in the
+ // returned BytesRef we need to ensure that the fragment size we return is equal to what was requested.
+ // Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now.
+ byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength);
+ builder.appendBytesRef(new BytesRef(truncatedBytes));
+ } else {
+ builder.appendBytesRef(new BytesRef(highlightBytes));
+ }
}
if (multivalued) {
builder.endPositionEntry();
@@ -140,6 +155,21 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
}
}
+ private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws CharacterCodingException {
+ if (bytes.length <= maxLength) return bytes;
+
+ CharsetDecoder dec = StandardCharsets.UTF_8.newDecoder()
+ .onMalformedInput(CodingErrorAction.IGNORE)
+ .onUnmappableCharacter(CodingErrorAction.IGNORE);
+
+ CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength));
+ ByteBuffer out = StandardCharsets.UTF_8.encode(chars);
+
+ byte[] result = new byte[out.remaining()];
+ out.get(result);
+ return result;
+ }
+
private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
return () -> {
StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet());
From ae92c8325061d9a2a520ca2cb96055af910bc2a7 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Wed, 20 Aug 2025 15:27:49 -0400
Subject: [PATCH 39/44] Fix most extractSnippets CSV tests, add some more test
cases
---
.../HighlighterExpressionEvaluator.java | 6 +-
.../extract-snippets-function.csv-spec | 84 ++++++++++++++-----
2 files changed, 66 insertions(+), 24 deletions(-)
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
index 65e9375458299..6a788d541463e 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -140,7 +140,8 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d
for (Text highlightText : highlight.fragments()) {
byte[] highlightBytes = highlightText.bytes().bytes();
if (highlightBytes.length > fragmentLength) {
- // TODO - This isn't a great solution, but in order to resolve character encoding issues in the
+ // TODO - Figure out a better way to construct BytesRef
+ // This isn't a great solution, but in order to resolve character encoding issues in the
// returned BytesRef we need to ensure that the fragment size we return is equal to what was requested.
// Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now.
byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength);
@@ -163,7 +164,8 @@ private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws Character
.onUnmappableCharacter(CodingErrorAction.IGNORE);
CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength));
- ByteBuffer out = StandardCharsets.UTF_8.encode(chars);
+ String trimmed = chars.toString().trim();
+ ByteBuffer out = StandardCharsets.UTF_8.encode(trimmed);
byte[] result = new byte[out.remaining()];
out.get(result);
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
index d512b1ce72a7e..d432b3c4da377 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
@@ -16,7 +16,7 @@ FROM books
// tag::extract-snippets-with-field-result[]
book_no:keyword | author:text | title:text | snippets:keyword
-1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps the
+1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th
// end::extract-snippets-with-field-result[]
;
@@ -25,18 +25,18 @@ required_capability: extract_snippets_function
FROM books
| WHERE MATCH(description, "hobbit")
-| EVAL snippets = extract_snippets(description, "hobbit", 1, 25)
+| EVAL snippets = extract_snippets(description, "hobbit", 1, 50)
| KEEP book_no, author, title, snippets
| SORT book_no
| LIMIT 5
;
book_no:keyword | author:text | title:text | snippets:keyword
-1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
-2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
-2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
-2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit
-2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | is accompanied by appropriate passage from The Hob
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | Tolkien, beloved author of THE HOBBIT.
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | This beautiful gift edition of The Hobbit, J.R.R.
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | them all - which has fallen into the hands of the
;
extractMultipleSnippetsWithMatch
@@ -51,11 +51,11 @@ FROM books
;
book_no:keyword | author:text | title:text | snippets:keyword
-1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
-2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
-2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of The Hobbit, Tolkien's own children, The Hobbit]
-2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Hobbit, , THE HOBBIT: AN UNEXPECTED, film adaptation of The Hobbit]
-2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of, Tolkien's own children, T]
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Ho, , THE HOBBIT: AN UNEXPECT, film adaptation of The Ho]
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hob
;
@@ -67,16 +67,20 @@ FROM books
| EVAL snippets = extract_snippets(description, "hobbit", 3, 25)
| MV_EXPAND snippets
| KEEP book_no, author, title, snippets
-| SORT book_no
-| LIMIT 5
+| SORT snippets
+| LIMIT 9
;
-book_no:keyword | author:text | title:text | snippets:keyword
-1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit
-2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT
-2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of The Hobbit
-2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, The Hobbit
-2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
+book_no:keyword | author:text | title:text | snippets:keyword
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | , THE HOBBIT: AN UNEXPECT
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
+6760 | J. R. R. Tolkien | Roverandom | By the author of The Hobb
+7350 | [Christopher Tolkien, John Ronald Reuel Tolkien] | Return of the Shadow | The character of the hobb
+4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's Hobbit poems in
+4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's acclaimed The H
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, T
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of
;
extractMultipleSnippetsWithSomeNoMatches
@@ -84,16 +88,52 @@ required_capability: extract_snippets_function
FROM books
| WHERE MATCH(author, "Faulkner")
-| EVAL snippets = extract_snippets(description, "slavery", 1, 10)
+| EVAL snippets = extract_snippets(description, "slavery", 1, 25)
| KEEP book_no, author, title, snippets
| SORT book_no
| LIMIT 5
;
book_no:keyword | author:text | title:text | snippets:keyword
-2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | abolition of slavery
+2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | , and the abolition of sl
2713 | William Faulkner | Collected Stories of William Faulkner | null
2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null
2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null
3293 | Danny Faulkner | Universe by Design | null
;
+
+extractSnippetsWithDefaultNumSnippetsAndLength
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit")
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | of The Hob
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | in The Hob
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob
+;
+
+extractSnippetsWithDefaultLength
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [of The Hob, Baggins is, children,]
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [in The Hob, , THE HOBB, of The Hob]
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob
+;
From 48c2825ad26e9dc93604c81b3e67ab5d388eb038 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 21 Aug 2025 10:14:57 -0400
Subject: [PATCH 40/44] Remove changes to AnalyzerTests
---
.../xpack/esql/analysis/AnalyzerTests.java | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
index 72d08927e013e..ad2225d887942 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
@@ -148,7 +148,7 @@
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.startsWith;
-@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug")
+//@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug")
public class AnalyzerTests extends ESTestCase {
private static final UnresolvedRelation UNRESOLVED_RELATION = new UnresolvedRelation(
@@ -2946,16 +2946,6 @@ public void testFromEnrichAndMatchColonUsage() {
assertEquals(esRelation.indexPattern(), "test");
}
- public void testSnippets() {
- LogicalPlan plan = analyze("""
- from test
- | EVAL x = extract_snippets(first_name, "text", 1, 10)
- | KEEP x
- """);
- var limit = as(plan, Limit.class);
- var filter = as(limit.child(), Filter.class);
- }
-
public void testFunctionNamedParamsAsFunctionArgument() {
LogicalPlan plan = analyze("""
from test
From 80d105675f7ae8d1933a824b0934f88c932465fc Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 21 Aug 2025 10:16:22 -0400
Subject: [PATCH 41/44] Spotless
---
.../org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 1 -
1 file changed, 1 deletion(-)
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
index ad2225d887942..d72a97647c110 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
@@ -18,7 +18,6 @@
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.test.ESTestCase;
-import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.xpack.esql.EsqlTestUtils;
import org.elasticsearch.xpack.esql.LoadMapping;
import org.elasticsearch.xpack.esql.VerificationException;
From ec3ac7ae095d1fc08bae7261eb95b7bb2fdc792e Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 21 Aug 2025 10:25:19 -0400
Subject: [PATCH 42/44] Add preview = true
---
.../esql/expression/function/scalar/string/ExtractSnippets.java | 1 +
1 file changed, 1 insertion(+)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index 159070ddf11c1..c3f924d8a2c4e 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -82,6 +82,7 @@ public class ExtractSnippets extends EsqlScalarFunction
@FunctionInfo(
returnType = "keyword",
+ preview = true,
description = """
Extracts the most relevant snippets to return from a given input string""",
examples = @Example(file = "keyword", tag = "extract_snippets")
From 694bf6a043aa2fb31e066d2508e0779853bc59b1 Mon Sep 17 00:00:00 2001
From: Kathleen DeRusso
Date: Thu, 21 Aug 2025 14:00:16 -0400
Subject: [PATCH 43/44] Add ExtractSnippetTests and associated generated
documentation
---
.../functions/description/extract_snippets.md | 6 ++
.../functions/examples/extract_snippets.md | 18 ++++
.../functions/layout/extract_snippets.md | 23 +++++
.../functions/parameters/extract_snippets.md | 16 ++++
.../functions/types/extract_snippets.md | 9 ++
.../images/functions/extract_snippets.svg | 1 +
.../functions/extract_snippets.json | 49 +++++++++++
.../kibana/docs/functions/extract_snippets.md | 9 ++
.../scalar/string/ExtractSnippets.java | 10 ++-
.../xpack/esql/SerializationTestUtils.java | 2 +
.../scalar/string/ExtractSnippetsTests.java | 85 +++++++++++++++++++
11 files changed, 224 insertions(+), 4 deletions(-)
create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md
create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md
create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md
create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md
create mode 100644 docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md
create mode 100644 docs/reference/query-languages/esql/images/functions/extract_snippets.svg
create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json
create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md
create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java
diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md
new file mode 100644
index 0000000000000..d2368798306f1
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md
@@ -0,0 +1,6 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Description**
+
+Extracts the most relevant snippets to return from a given input string.
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md
new file mode 100644
index 0000000000000..741e7e43a74b4
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md
@@ -0,0 +1,18 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Example**
+
+```{applies_to}
+stack: preview 9.2.0
+```
+
+```esql
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+```
+
+| book_no:keyword | author:text | title:text | snippets:keyword |
+| --- | --- | --- | --- |
+| 1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th |
+
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md
new file mode 100644
index 0000000000000..69d7ee3b59f1b
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md
@@ -0,0 +1,23 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+## `EXTRACT_SNIPPETS` [esql-extract_snippets]
+
+**Syntax**
+
+:::{image} ../../../images/functions/extract_snippets.svg
+:alt: Embedded
+:class: text-center
+:::
+
+
+:::{include} ../parameters/extract_snippets.md
+:::
+
+:::{include} ../description/extract_snippets.md
+:::
+
+:::{include} ../types/extract_snippets.md
+:::
+
+:::{include} ../examples/extract_snippets.md
+:::
diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md
new file mode 100644
index 0000000000000..8c5cea74e8512
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md
@@ -0,0 +1,16 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Parameters**
+
+`field`
+: The input string
+
+`str`
+: The input string
+
+`num_snippets`
+: The number of snippets to return. Defaults to 1
+
+`snippet_length`
+: The length of snippets to return. Defaults to 10
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md
new file mode 100644
index 0000000000000..2072f7d99abad
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md
@@ -0,0 +1,9 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Supported types**
+
+| field | str | num_snippets | snippet_length | result |
+| --- | --- | --- | --- | --- |
+| keyword | keyword | | | keyword |
+| text | keyword | | | keyword |
+
diff --git a/docs/reference/query-languages/esql/images/functions/extract_snippets.svg b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg
new file mode 100644
index 0000000000000..c17eff787d563
--- /dev/null
+++ b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json
new file mode 100644
index 0000000000000..e1c0b90fb237b
--- /dev/null
+++ b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json
@@ -0,0 +1,49 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.",
+ "type" : "scalar",
+ "name" : "extract_snippets",
+ "description" : "Extracts the most relevant snippets to return from a given input string.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ },
+ {
+ "name" : "str",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "keyword"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "text",
+ "optional" : false,
+ "description" : "The input string"
+ },
+ {
+ "name" : "str",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "keyword"
+ }
+ ],
+ "examples" : [
+ "FROM books\n| EVAL snippets = extract_snippets(description, \"crowning achievement\", 1, 25)"
+ ],
+ "preview" : true,
+ "snapshot_only" : true
+}
diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md
new file mode 100644
index 0000000000000..b7865446d397f
--- /dev/null
+++ b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md
@@ -0,0 +1,9 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+### EXTRACT SNIPPETS
+Extracts the most relevant snippets to return from a given input string.
+
+```esql
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+```
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
index c3f924d8a2c4e..6eb531356de4b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java
@@ -84,13 +84,14 @@ public class ExtractSnippets extends EsqlScalarFunction
returnType = "keyword",
preview = true,
description = """
- Extracts the most relevant snippets to return from a given input string""",
- examples = @Example(file = "keyword", tag = "extract_snippets")
+ Extracts the most relevant snippets to return from a given input string.""",
+ examples = {
+ @Example(file = "extract-snippets-function", tag = "extract-snippets-with-field", applies_to = "stack: preview 9.2.0") }
)
public ExtractSnippets(
Source source,
- @Param(name = "field", type = { "keyword" }, description = "The input string") Expression field,
- @Param(name = "str", type = { "keyword", "text" }, description = "The input string") Expression str,
+ @Param(name = "field", type = { "keyword", "text" }, description = "The input string") Expression field,
+ @Param(name = "str", type = { "keyword" }, description = "The input string") Expression str,
@Param(
optional = true,
name = "num_snippets",
@@ -105,6 +106,7 @@ public ExtractSnippets(
) Expression snippetLength
) {
this(source, field, str, numSnippets, snippetLength, new MatchQueryBuilder(field.sourceText(), str.sourceText()));
+
}
public ExtractSnippets(
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java
index e55a1b039258e..c87cc11306b13 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java
@@ -18,6 +18,7 @@
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder;
+import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.RegexpQueryBuilder;
@@ -113,6 +114,7 @@ public static NamedWriteableRegistry writableRegistry() {
entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, RegexpQueryBuilder.NAME, RegexpQueryBuilder::new));
entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, ExistsQueryBuilder.NAME, ExistsQueryBuilder::new));
entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, KnnVectorQueryBuilder.NAME, KnnVectorQueryBuilder::new));
+ entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, MatchQueryBuilder.NAME, MatchQueryBuilder::new));
entries.add(SingleValueQuery.ENTRY);
entries.addAll(ExpressionWritables.getNamedWriteables());
entries.addAll(PlanWritables.getNamedWriteables());
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java
new file mode 100644
index 0000000000000..da5e85a0dff98
--- /dev/null
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.expression.function.scalar.string;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase;
+import org.elasticsearch.xpack.esql.expression.function.FunctionName;
+import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
+import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.stringCases;
+import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER;
+import static org.hamcrest.Matchers.equalTo;
+
+@FunctionName("extract_snippets")
+public class ExtractSnippetsTests extends AbstractFunctionTestCase {
+
+ public ExtractSnippetsTests(@Name("TestCase") Supplier testCaseSupplier) {
+ this.testCase = testCaseSupplier.get();
+ }
+
+ @ParametersFactory
+ public static Iterable