diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md
new file mode 100644
index 0000000000000..d2368798306f1
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md
@@ -0,0 +1,6 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Description**
+
+Extracts the most relevant snippets to return from a given input string.
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md
new file mode 100644
index 0000000000000..741e7e43a74b4
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md
@@ -0,0 +1,18 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Example**
+
+```{applies_to}
+stack: preview 9.2.0
+```
+
+```esql
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+```
+
+| book_no:keyword | author:text | title:text | snippets:keyword |
+| --- | --- | --- | --- |
+| 1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th |
+
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md
new file mode 100644
index 0000000000000..69d7ee3b59f1b
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md
@@ -0,0 +1,23 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+## `EXTRACT_SNIPPETS` [esql-extract_snippets]
+
+**Syntax**
+
+:::{image} ../../../images/functions/extract_snippets.svg
+:alt: Embedded
+:class: text-center
+:::
+
+
+:::{include} ../parameters/extract_snippets.md
+:::
+
+:::{include} ../description/extract_snippets.md
+:::
+
+:::{include} ../types/extract_snippets.md
+:::
+
+:::{include} ../examples/extract_snippets.md
+:::
diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md
new file mode 100644
index 0000000000000..8c5cea74e8512
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md
@@ -0,0 +1,16 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Parameters**
+
+`field`
+: The input string
+
+`str`
+: The input string
+
+`num_snippets`
+: The number of snippets to return. Defaults to 1
+
+`snippet_length`
+: The length of snippets to return. Defaults to 10
+
diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md
new file mode 100644
index 0000000000000..2072f7d99abad
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md
@@ -0,0 +1,9 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+**Supported types**
+
+| field | str | num_snippets | snippet_length | result |
+| --- | --- | --- | --- | --- |
+| keyword | keyword | | | keyword |
+| text | keyword | | | keyword |
+
diff --git a/docs/reference/query-languages/esql/images/functions/extract_snippets.svg b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg
new file mode 100644
index 0000000000000..c17eff787d563
--- /dev/null
+++ b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json
new file mode 100644
index 0000000000000..e1c0b90fb237b
--- /dev/null
+++ b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json
@@ -0,0 +1,49 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.",
+ "type" : "scalar",
+ "name" : "extract_snippets",
+ "description" : "Extracts the most relevant snippets to return from a given input string.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ },
+ {
+ "name" : "str",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "keyword"
+ },
+ {
+ "params" : [
+ {
+ "name" : "field",
+ "type" : "text",
+ "optional" : false,
+ "description" : "The input string"
+ },
+ {
+ "name" : "str",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "The input string"
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "keyword"
+ }
+ ],
+ "examples" : [
+ "FROM books\n| EVAL snippets = extract_snippets(description, \"crowning achievement\", 1, 25)"
+ ],
+ "preview" : true,
+ "snapshot_only" : true
+}
diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md
new file mode 100644
index 0000000000000..b7865446d397f
--- /dev/null
+++ b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md
@@ -0,0 +1,9 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+### EXTRACT SNIPPETS
+Extracts the most relevant snippets to return from a given input string.
+
+```esql
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+```
diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java
index b16c00033292b..3cc3f8023cd6e 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchHit.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java
@@ -123,7 +123,7 @@ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity) {
this(nestedTopDocId, id, nestedIdentity, null);
}
- private SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
+ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
this(
nestedTopDocId,
DEFAULT_SCORE,
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
index f3aee46398432..6c47d6f995097 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -280,6 +280,8 @@
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
*/
public class SearchModule {
+ private static volatile Map staticHighlighters = Map.of();
+
public static final Setting INDICES_MAX_CLAUSE_COUNT_SETTING = Setting.intSetting(
"indices.query.bool.max_clause_count",
4096,
@@ -923,6 +925,10 @@ private static Map setupHighlighters(Settings settings, Lis
return unmodifiableMap(highlighters.getRegistry());
}
+ public static Map getStaticHighlighters() {
+ return staticHighlighters;
+ }
+
private void registerScoreFunctions(List plugins) {
// ScriptScoreFunctionBuilder has it own named writable because of a new script_score query
namedWriteables.add(
@@ -1062,6 +1068,9 @@ private void registerFetchSubPhases(List plugins) {
registerFetchSubPhase(new HighlightPhase(highlighters));
registerFetchSubPhase(new FetchScorePhase());
+ // Store highlighters in a static map for other plugins to access
+ staticHighlighters = Map.copyOf(highlighters);
+
FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters);
registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase);
}
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
index 3efbcd15140e5..9ae3a1349510e 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java
@@ -114,7 +114,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings();
- Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html")
+ Encoder encoder = "html".equals(fieldContext.field.fieldOptions().encoder())
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
new file mode 100644
index 0000000000000..bb7cf4ba0e675
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.search.fetch.subphase.highlight;
+
+import org.apache.lucene.search.Query;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.SearchExecutionContext;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Utility class for building highlighting queries for the purpose of extracting snippets.
+ */
+public class HighlightSnippetUtils {
+
+ public static SearchHighlightContext buildSearchHighlightContextForSnippets(
+ SearchExecutionContext searchExecutionContext,
+ String field,
+ int numSnippets,
+ int snippetCharLength,
+ QueryBuilder queryBuilder
+ ) throws IOException {
+ SearchHighlightContext.Field highlightField = buildFieldHighlightContextForSnippets(
+ searchExecutionContext,
+ field,
+ numSnippets,
+ snippetCharLength,
+ queryBuilder.toQuery(searchExecutionContext)
+ );
+ return new SearchHighlightContext(List.of(highlightField));
+ }
+
+ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets(
+ SearchExecutionContext searchExecutionContext,
+ String fieldName,
+ int numSnippets,
+ int snippetCharLength,
+ Query query
+ ) {
+ SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
+ optionsBuilder.numberOfFragments(numSnippets);
+ optionsBuilder.fragmentCharSize(snippetCharLength);
+ optionsBuilder.noMatchSize(snippetCharLength);
+ optionsBuilder.preTags(new String[] { "" });
+ optionsBuilder.postTags(new String[] { "" });
+ optionsBuilder.requireFieldMatch(false);
+ optionsBuilder.scoreOrdered(true);
+ optionsBuilder.highlightQuery(query);
+ return new SearchHighlightContext.Field(fieldName, optionsBuilder.build());
+ }
+
+}
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
index a85ae92c24bcf..111805be5b905 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java
@@ -185,16 +185,16 @@ public Map options() {
return options;
}
- static class Builder {
+ public static class Builder {
private final FieldOptions fieldOptions = new FieldOptions();
- Builder fragmentCharSize(int fragmentCharSize) {
+ public Builder fragmentCharSize(int fragmentCharSize) {
fieldOptions.fragmentCharSize = fragmentCharSize;
return this;
}
- Builder numberOfFragments(int numberOfFragments) {
+ public Builder numberOfFragments(int numberOfFragments) {
fieldOptions.numberOfFragments = numberOfFragments;
return this;
}
@@ -209,17 +209,17 @@ Builder encoder(String encoder) {
return this;
}
- Builder preTags(String[] preTags) {
+ public Builder preTags(String[] preTags) {
fieldOptions.preTags = preTags;
return this;
}
- Builder postTags(String[] postTags) {
+ public Builder postTags(String[] postTags) {
fieldOptions.postTags = postTags;
return this;
}
- Builder scoreOrdered(boolean scoreOrdered) {
+ public Builder scoreOrdered(boolean scoreOrdered) {
fieldOptions.scoreOrdered = scoreOrdered;
return this;
}
@@ -229,7 +229,7 @@ Builder highlightFilter(boolean highlightFilter) {
return this;
}
- Builder requireFieldMatch(boolean requireFieldMatch) {
+ public Builder requireFieldMatch(boolean requireFieldMatch) {
fieldOptions.requireFieldMatch = requireFieldMatch;
return this;
}
@@ -269,7 +269,7 @@ Builder boundaryScannerLocale(Locale boundaryScannerLocale) {
return this;
}
- Builder highlightQuery(Query highlightQuery) {
+ public Builder highlightQuery(Query highlightQuery) {
fieldOptions.highlightQuery = highlightQuery;
return this;
}
@@ -294,7 +294,7 @@ Builder options(Map options) {
return this;
}
- FieldOptions build() {
+ public FieldOptions build() {
return fieldOptions;
}
diff --git a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
index 7d018a7ef4ba9..cb3ddb7deb5cc 100644
--- a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
+++ b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java
@@ -28,6 +28,7 @@
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.search.RescoreDocIds;
import org.elasticsearch.search.SearchExtBuilder;
+import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.aggregations.SearchContextAggregations;
import org.elasticsearch.search.collapse.CollapseContext;
@@ -40,6 +41,7 @@
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.lookup.SourceFilter;
import org.elasticsearch.search.profile.Profilers;
@@ -152,6 +154,10 @@ public final boolean isClosed() {
public abstract void highlight(SearchHighlightContext highlight);
+ public Map highlighters() {
+ return SearchModule.getStaticHighlighters();
+ }
+
public InnerHitsContext innerHits() {
if (innerHitsContext == null) {
innerHitsContext = new InnerHitsContext();
diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java
index f21ed72d7eb21..5504e48d74636 100644
--- a/x-pack/plugin/esql/compute/src/main/java/module-info.java
+++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java
@@ -21,6 +21,7 @@
requires org.elasticsearch.geo;
requires org.elasticsearch.xcore;
requires hppc;
+ requires org.apache.lucene.highlighter;
exports org.elasticsearch.compute;
exports org.elasticsearch.compute.aggregation;
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
new file mode 100644
index 0000000000000..6a788d541463e
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.lucene;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
+import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.fetch.FetchContext;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
+import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils;
+import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
+import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
+import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.search.lookup.Source;
+import org.elasticsearch.xcontent.Text;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.core.RefCounted.ALWAYS_REFERENCED;
+
+public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator
+ implements
+ EvalOperator.ExpressionEvaluator {
+
+ private final String fieldName;
+ private final int numFragments;
+ private final int fragmentLength;
+ private final Map highlighters;
+ private final FetchContext fetchContext;
+ private final MappedFieldType fieldType;
+
+ HighlighterExpressionEvaluator(
+ BlockFactory blockFactory,
+ ShardConfig[] shardConfigs,
+ String fieldName,
+ Integer numFragments,
+ Integer fragmentLength,
+ SearchContext searchContext,
+ Map highlighters
+ ) {
+ super(blockFactory, shardConfigs);
+ this.fieldName = fieldName;
+ this.numFragments = numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS;
+ this.fragmentLength = fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE;
+ this.highlighters = highlighters;
+
+ // Create a source loader for highlighter use
+ SourceLoader sourceLoader = searchContext.newSourceLoader(null);
+ fetchContext = new FetchContext(searchContext, sourceLoader);
+ SearchExecutionContext searchExecutionContext = searchContext.getSearchExecutionContext();
+ if (searchExecutionContext == null) {
+ throw new IllegalStateException("SearchExecutionContext not found");
+ }
+ fieldType = searchExecutionContext.getFieldType(fieldName);
+ }
+
+ @Override
+ protected ScoreMode scoreMode() {
+ return ScoreMode.COMPLETE;
+ }
+
+ @Override
+ protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
+ return blockFactory.newConstantNullBlock(size);
+ }
+
+ @Override
+ protected BytesRefBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+ return blockFactory.newBytesRefBlockBuilder(size * numFragments);
+ }
+
+ @Override
+ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
+
+ // TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes?
+
+ SearchHit searchHit = new SearchHit(docId, null, null, ALWAYS_REFERENCED);
+ Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
+ Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter());
+
+ SearchHighlightContext.Field field = HighlightSnippetUtils.buildFieldHighlightContextForSnippets(
+ fetchContext.getSearchExecutionContext(),
+ fieldName,
+ numFragments,
+ fragmentLength,
+ query
+ );
+ FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null);
+ FieldHighlightContext highlightContext = new FieldHighlightContext(
+ fieldName,
+ field,
+ fieldType,
+ fetchContext,
+ hitContext,
+ query,
+ new HashMap<>()
+ );
+ HighlightField highlight = highlighter.highlight(highlightContext);
+
+ if (highlight != null) {
+ boolean multivalued = highlight.fragments().length > 1;
+ if (multivalued) {
+ builder.beginPositionEntry();
+ }
+ for (Text highlightText : highlight.fragments()) {
+ byte[] highlightBytes = highlightText.bytes().bytes();
+ if (highlightBytes.length > fragmentLength) {
+ // TODO - Figure out a better way to construct BytesRef
+ // This isn't a great solution, but in order to resolve character encoding issues in the
+ // returned BytesRef we need to ensure that the fragment size we return is equal to what was requested.
+ // Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now.
+ byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength);
+ builder.appendBytesRef(new BytesRef(truncatedBytes));
+ } else {
+ builder.appendBytesRef(new BytesRef(highlightBytes));
+ }
+ }
+ if (multivalued) {
+ builder.endPositionEntry();
+ }
+ }
+ }
+
+ private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws CharacterCodingException {
+ if (bytes.length <= maxLength) return bytes;
+
+ CharsetDecoder dec = StandardCharsets.UTF_8.newDecoder()
+ .onMalformedInput(CodingErrorAction.IGNORE)
+ .onUnmappableCharacter(CodingErrorAction.IGNORE);
+
+ CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength));
+ String trimmed = chars.toString().trim();
+ ByteBuffer out = StandardCharsets.UTF_8.encode(trimmed);
+
+ byte[] result = new byte[out.remaining()];
+ out.get(result);
+ return result;
+ }
+
+ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
+ return () -> {
+ StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet());
+ try {
+ LeafStoredFieldLoader leafRootLoader = rootLoader.getLoader(ctx, null);
+ leafRootLoader.advanceTo(doc);
+ return Source.fromBytes(leafRootLoader.source());
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ };
+ }
+
+ @Override
+ protected void appendNoMatch(BytesRefBlock.Builder builder) {
+ builder.appendNull();
+ }
+
+ @Override
+ public Block eval(Page page) {
+ return executeQuery(page);
+ }
+
+ public record Factory(
+ ShardConfig[] shardConfigs,
+ String fieldName,
+ Integer numFragments,
+ Integer fragmentSize,
+ SearchContext searchContext,
+ Map highlighters
+ ) implements EvalOperator.ExpressionEvaluator.Factory {
+ @Override
+ public EvalOperator.ExpressionEvaluator get(DriverContext context) {
+ return new HighlighterExpressionEvaluator(
+ context.blockFactory(),
+ shardConfigs,
+ fieldName,
+ numFragments,
+ fragmentSize,
+ searchContext,
+ highlighters
+ );
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
index c7f187c6c4a8f..ad05d27b8f42d 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java
@@ -17,7 +17,6 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
-import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.DocBlock;
@@ -267,7 +266,7 @@ Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOE
scoreBuilder,
ctx,
LuceneQueryEvaluator.this::appendNoMatch,
- LuceneQueryEvaluator.this::appendMatch,
+ (builder, scorer1, docId, ctc, query) -> LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query),
weight.getQuery()
)
) {
@@ -310,12 +309,12 @@ private void initScorer(int minDocId) throws IOException {
private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
if (scorer.iterator().docID() == doc) {
- appendMatch(builder, scorer);
+ appendMatch(builder, scorer, doc, ctx, weight.getQuery());
} else if (scorer.iterator().docID() > doc) {
appendNoMatch(builder);
} else {
if (scorer.iterator().advance(doc) == doc) {
- appendMatch(builder, scorer);
+ appendMatch(builder, scorer, doc, ctx, weight.getQuery());
} else {
appendNoMatch(builder);
}
@@ -323,6 +322,11 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
}
}
+ @FunctionalInterface
+ public interface MatchAppender {
+ void accept(T t, U u, int docId, LeafReaderContext leafReaderContext, Query query) throws E;
+ }
+
/**
* Collects matching information for dense range of doc ids. This assumes that
* doc ids are sent to {@link LeafCollector#collect(int)} in ascending order
@@ -333,7 +337,7 @@ static class DenseCollector implements LeafCollector, R
private final int max;
private final LeafReaderContext leafReaderContext;
private final Consumer appendNoMatch;
- private final CheckedBiConsumer appendMatch;
+ private final MatchAppender appendMatch;
private final Query query;
private Scorable scorer;
@@ -345,7 +349,7 @@ static class DenseCollector implements LeafCollector, R
U scoreBuilder,
LeafReaderContext leafReaderContext,
Consumer appendNoMatch,
- CheckedBiConsumer appendMatch,
+ MatchAppender appendMatch,
Query query
) {
this.scoreBuilder = scoreBuilder;
@@ -367,7 +371,7 @@ public void collect(int doc) throws IOException {
while (next++ < doc) {
appendNoMatch.accept(scoreBuilder);
}
- appendMatch.accept(scoreBuilder, scorer);
+ appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query);
}
public Block build() {
@@ -405,7 +409,8 @@ public void close() {
/**
* Appends a matching result to a builder created by @link createVectorBuilder}
*/
- protected abstract void appendMatch(T builder, Scorable scorer) throws IOException;
+ protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException;
/**
* Appends a non matching result to a builder created by @link createVectorBuilder}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
index 814ecaa577238..c249620060685 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java
@@ -7,6 +7,7 @@
package org.elasticsearch.compute.lucene;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
@@ -58,7 +59,8 @@ protected void appendNoMatch(BooleanBlock.Builder builder) {
}
@Override
- protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer) throws IOException {
+ protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
builder.appendBoolean(true);
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
index 9c6db6b0bdc63..88b5721a6fdf9 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java
@@ -7,6 +7,7 @@
package org.elasticsearch.compute.lucene;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
@@ -60,7 +61,8 @@ protected void appendNoMatch(DoubleBlock.Builder builder) {
}
@Override
- protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer) throws IOException {
+ protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
+ throws IOException {
builder.appendDouble(scorer.score());
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
index 6042a3c8cca5f..616679669b46f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java
@@ -29,7 +29,7 @@ protected DenseCollector createDenseCollector(int min, int
blockFactory().newBooleanBlockBuilder(max - min + 1),
null,
b -> b.appendBoolean(false),
- (b, s) -> b.appendBoolean(true),
+ (b, s, d, lr, q) -> b.appendBoolean(true),
null
);
}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
index ba075ac98feb8..af162db91978f 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java
@@ -33,7 +33,7 @@ protected LuceneQueryEvaluator.DenseCollector createDenseCo
blockFactory().newDoubleBlockBuilder(max - min + 1),
null,
b -> b.appendDouble(NO_MATCH_SCORE),
- (b, s) -> b.appendDouble(s.score()),
+ (b, s, d, lr, q) -> b.appendDouble(s.score()),
null
);
}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
new file mode 100644
index 0000000000000..d432b3c4da377
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec
@@ -0,0 +1,139 @@
+###############################################
+# Tests for ExtractSnippets function
+#
+
+extractSnippetsWithField
+required_capability: extract_snippets_function
+
+// tag::extract-snippets-with-field[]
+FROM books
+| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25)
+// end::extract-snippets-with-field[]
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 1
+;
+
+// tag::extract-snippets-with-field-result[]
+book_no:keyword | author:text | title:text | snippets:keyword
+1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th
+// end::extract-snippets-with-field-result[]
+;
+
+extractSnippetsWithMatch
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 1, 50)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | is accompanied by appropriate passage from The Hob
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | Tolkien, beloved author of THE HOBBIT.
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | This beautiful gift edition of The Hobbit, J.R.R.
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | them all - which has fallen into the hands of the
+;
+
+extractMultipleSnippetsWithMatch
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3, 25)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of, Tolkien's own children, T]
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Ho, , THE HOBBIT: AN UNEXPECT, film adaptation of The Ho]
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hob
+;
+
+
+extractMultipleSnippetsWithMatchMvExpand
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3, 25)
+| MV_EXPAND snippets
+| KEEP book_no, author, title, snippets
+| SORT snippets
+| LIMIT 9
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | , THE HOBBIT: AN UNEXPECT
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit
+6760 | J. R. R. Tolkien | Roverandom | By the author of The Hobb
+7350 | [Christopher Tolkien, John Ronald Reuel Tolkien] | Return of the Shadow | The character of the hobb
+4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's Hobbit poems in
+4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's acclaimed The H
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, T
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of
+;
+
+extractMultipleSnippetsWithSomeNoMatches
+required_capability: extract_snippets_function
+
+FROM books
+| WHERE MATCH(author, "Faulkner")
+| EVAL snippets = extract_snippets(description, "slavery", 1, 25)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | , and the abolition of sl
+2713 | William Faulkner | Collected Stories of William Faulkner | null
+2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null
+2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null
+3293 | Danny Faulkner | Universe by Design | null
+;
+
+extractSnippetsWithDefaultNumSnippetsAndLength
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit")
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | of The Hob
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | in The Hob
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob
+;
+
+extractSnippetsWithDefaultLength
+
+FROM books
+| WHERE MATCH(description, "hobbit")
+| EVAL snippets = extract_snippets(description, "hobbit", 3)
+| KEEP book_no, author, title, snippets
+| SORT book_no
+| LIMIT 5
+;
+
+book_no:keyword | author:text | title:text | snippets:keyword
+1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H
+2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB
+2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [of The Hob, Baggins is, children,]
+2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [in The Hob, , THE HOBB, of The Hob]
+2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob
+;
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java
new file mode 100644
index 0000000000000..d4a99d18d63ef
--- /dev/null
+++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.plugin;
+
+import org.elasticsearch.action.index.IndexRequest;
+import org.elasticsearch.action.support.WriteRequest;
+import org.elasticsearch.client.internal.IndicesAdminClient;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase;
+import org.junit.Before;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+
+//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug")
+public class ExtractSnippetsFunctionIT extends AbstractEsqlIntegTestCase {
+
+ private static final List