diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md new file mode 100644 index 0000000000000..d2368798306f1 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Extracts the most relevant snippets to return from a given input string. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md new file mode 100644 index 0000000000000..741e7e43a74b4 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md @@ -0,0 +1,18 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```{applies_to} +stack: preview 9.2.0 +``` + +```esql +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +``` + +| book_no:keyword | author:text | title:text | snippets:keyword | +| --- | --- | --- | --- | +| 1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md new file mode 100644 index 0000000000000..69d7ee3b59f1b --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `EXTRACT_SNIPPETS` [esql-extract_snippets] + +**Syntax** + +:::{image} ../../../images/functions/extract_snippets.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/extract_snippets.md +::: + +:::{include} ../description/extract_snippets.md +::: + +:::{include} ../types/extract_snippets.md +::: + +:::{include} ../examples/extract_snippets.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md new file mode 100644 index 0000000000000..8c5cea74e8512 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md @@ -0,0 +1,16 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`field` +: The input string + +`str` +: The input string + +`num_snippets` +: The number of snippets to return. Defaults to 1 + +`snippet_length` +: The length of snippets to return. Defaults to 10 + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md new file mode 100644 index 0000000000000..2072f7d99abad --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| field | str | num_snippets | snippet_length | result | +| --- | --- | --- | --- | --- | +| keyword | keyword | | | keyword | +| text | keyword | | | keyword | + diff --git a/docs/reference/query-languages/esql/images/functions/extract_snippets.svg b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg new file mode 100644 index 0000000000000..c17eff787d563 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg @@ -0,0 +1 @@ +EXTRACT_SNIPPETS(field,str,num_snippets,snippet_length) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json new file mode 100644 index 0000000000000..e1c0b90fb237b --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json @@ -0,0 +1,49 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "extract_snippets", + "description" : "Extracts the most relevant snippets to return from a given input string.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + }, + { + "name" : "str", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "The input string" + }, + { + "name" : "str", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "examples" : [ + "FROM books\n| EVAL snippets = extract_snippets(description, \"crowning achievement\", 1, 25)" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md new file mode 100644 index 0000000000000..b7865446d397f --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### EXTRACT SNIPPETS +Extracts the most relevant snippets to return from a given input string. + +```esql +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +``` diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java index b16c00033292b..3cc3f8023cd6e 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchHit.java +++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java @@ -123,7 +123,7 @@ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity) { this(nestedTopDocId, id, nestedIdentity, null); } - private SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) { + public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) { this( nestedTopDocId, DEFAULT_SCORE, diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index f3aee46398432..6c47d6f995097 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -280,6 +280,8 @@ * Sets up things that can be done at search time like queries, aggregations, and suggesters. */ public class SearchModule { + private static volatile Map staticHighlighters = Map.of(); + public static final Setting INDICES_MAX_CLAUSE_COUNT_SETTING = Setting.intSetting( "indices.query.bool.max_clause_count", 4096, @@ -923,6 +925,10 @@ private static Map setupHighlighters(Settings settings, Lis return unmodifiableMap(highlighters.getRegistry()); } + public static Map getStaticHighlighters() { + return staticHighlighters; + } + private void registerScoreFunctions(List plugins) { // ScriptScoreFunctionBuilder has it own named writable because of a new script_score query namedWriteables.add( @@ -1062,6 +1068,9 @@ private void registerFetchSubPhases(List plugins) { registerFetchSubPhase(new HighlightPhase(highlighters)); registerFetchSubPhase(new FetchScorePhase()); + // Store highlighters in a static map for other plugins to access + staticHighlighters = Map.copyOf(highlighters); + FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters); registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase); } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 3efbcd15140e5..9ae3a1349510e 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -114,7 +114,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings(); - Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") + Encoder encoder = "html".equals(fieldContext.field.fieldOptions().encoder()) ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java new file mode 100644 index 0000000000000..bb7cf4ba0e675 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.io.IOException; +import java.util.List; + +/** + * Utility class for building highlighting queries for the purpose of extracting snippets. + */ +public class HighlightSnippetUtils { + + public static SearchHighlightContext buildSearchHighlightContextForSnippets( + SearchExecutionContext searchExecutionContext, + String field, + int numSnippets, + int snippetCharLength, + QueryBuilder queryBuilder + ) throws IOException { + SearchHighlightContext.Field highlightField = buildFieldHighlightContextForSnippets( + searchExecutionContext, + field, + numSnippets, + snippetCharLength, + queryBuilder.toQuery(searchExecutionContext) + ); + return new SearchHighlightContext(List.of(highlightField)); + } + + public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets( + SearchExecutionContext searchExecutionContext, + String fieldName, + int numSnippets, + int snippetCharLength, + Query query + ) { + SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); + optionsBuilder.numberOfFragments(numSnippets); + optionsBuilder.fragmentCharSize(snippetCharLength); + optionsBuilder.noMatchSize(snippetCharLength); + optionsBuilder.preTags(new String[] { "" }); + optionsBuilder.postTags(new String[] { "" }); + optionsBuilder.requireFieldMatch(false); + optionsBuilder.scoreOrdered(true); + optionsBuilder.highlightQuery(query); + return new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); + } + +} diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java index a85ae92c24bcf..111805be5b905 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java @@ -185,16 +185,16 @@ public Map options() { return options; } - static class Builder { + public static class Builder { private final FieldOptions fieldOptions = new FieldOptions(); - Builder fragmentCharSize(int fragmentCharSize) { + public Builder fragmentCharSize(int fragmentCharSize) { fieldOptions.fragmentCharSize = fragmentCharSize; return this; } - Builder numberOfFragments(int numberOfFragments) { + public Builder numberOfFragments(int numberOfFragments) { fieldOptions.numberOfFragments = numberOfFragments; return this; } @@ -209,17 +209,17 @@ Builder encoder(String encoder) { return this; } - Builder preTags(String[] preTags) { + public Builder preTags(String[] preTags) { fieldOptions.preTags = preTags; return this; } - Builder postTags(String[] postTags) { + public Builder postTags(String[] postTags) { fieldOptions.postTags = postTags; return this; } - Builder scoreOrdered(boolean scoreOrdered) { + public Builder scoreOrdered(boolean scoreOrdered) { fieldOptions.scoreOrdered = scoreOrdered; return this; } @@ -229,7 +229,7 @@ Builder highlightFilter(boolean highlightFilter) { return this; } - Builder requireFieldMatch(boolean requireFieldMatch) { + public Builder requireFieldMatch(boolean requireFieldMatch) { fieldOptions.requireFieldMatch = requireFieldMatch; return this; } @@ -269,7 +269,7 @@ Builder boundaryScannerLocale(Locale boundaryScannerLocale) { return this; } - Builder highlightQuery(Query highlightQuery) { + public Builder highlightQuery(Query highlightQuery) { fieldOptions.highlightQuery = highlightQuery; return this; } @@ -294,7 +294,7 @@ Builder options(Map options) { return this; } - FieldOptions build() { + public FieldOptions build() { return fieldOptions; } diff --git a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java index 7d018a7ef4ba9..cb3ddb7deb5cc 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -28,6 +28,7 @@ import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.search.RescoreDocIds; import org.elasticsearch.search.SearchExtBuilder; +import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.collapse.CollapseContext; @@ -40,6 +41,7 @@ import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.fetch.subphase.InnerHitsContext; import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.search.profile.Profilers; @@ -152,6 +154,10 @@ public final boolean isClosed() { public abstract void highlight(SearchHighlightContext highlight); + public Map highlighters() { + return SearchModule.getStaticHighlighters(); + } + public InnerHitsContext innerHits() { if (innerHitsContext == null) { innerHitsContext = new InnerHitsContext(); diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java index f21ed72d7eb21..5504e48d74636 100644 --- a/x-pack/plugin/esql/compute/src/main/java/module-info.java +++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java @@ -21,6 +21,7 @@ requires org.elasticsearch.geo; requires org.elasticsearch.xcore; requires hppc; + requires org.apache.lucene.highlighter; exports org.elasticsearch.compute; exports org.elasticsearch.compute.aggregation; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java new file mode 100644 index 0000000000000..6a788d541463e --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -0,0 +1,219 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.fetch.FetchContext; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.Text; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +import static org.elasticsearch.core.RefCounted.ALWAYS_REFERENCED; + +public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator + implements + EvalOperator.ExpressionEvaluator { + + private final String fieldName; + private final int numFragments; + private final int fragmentLength; + private final Map highlighters; + private final FetchContext fetchContext; + private final MappedFieldType fieldType; + + HighlighterExpressionEvaluator( + BlockFactory blockFactory, + ShardConfig[] shardConfigs, + String fieldName, + Integer numFragments, + Integer fragmentLength, + SearchContext searchContext, + Map highlighters + ) { + super(blockFactory, shardConfigs); + this.fieldName = fieldName; + this.numFragments = numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS; + this.fragmentLength = fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE; + this.highlighters = highlighters; + + // Create a source loader for highlighter use + SourceLoader sourceLoader = searchContext.newSourceLoader(null); + fetchContext = new FetchContext(searchContext, sourceLoader); + SearchExecutionContext searchExecutionContext = searchContext.getSearchExecutionContext(); + if (searchExecutionContext == null) { + throw new IllegalStateException("SearchExecutionContext not found"); + } + fieldType = searchExecutionContext.getFieldType(fieldName); + } + + @Override + protected ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + @Override + protected Block createNoMatchBlock(BlockFactory blockFactory, int size) { + return blockFactory.newConstantNullBlock(size); + } + + @Override + protected BytesRefBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) { + return blockFactory.newBytesRefBlockBuilder(size * numFragments); + } + + @Override + protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { + + // TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes? + + SearchHit searchHit = new SearchHit(docId, null, null, ALWAYS_REFERENCED); + Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); + Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter()); + + SearchHighlightContext.Field field = HighlightSnippetUtils.buildFieldHighlightContextForSnippets( + fetchContext.getSearchExecutionContext(), + fieldName, + numFragments, + fragmentLength, + query + ); + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null); + FieldHighlightContext highlightContext = new FieldHighlightContext( + fieldName, + field, + fieldType, + fetchContext, + hitContext, + query, + new HashMap<>() + ); + HighlightField highlight = highlighter.highlight(highlightContext); + + if (highlight != null) { + boolean multivalued = highlight.fragments().length > 1; + if (multivalued) { + builder.beginPositionEntry(); + } + for (Text highlightText : highlight.fragments()) { + byte[] highlightBytes = highlightText.bytes().bytes(); + if (highlightBytes.length > fragmentLength) { + // TODO - Figure out a better way to construct BytesRef + // This isn't a great solution, but in order to resolve character encoding issues in the + // returned BytesRef we need to ensure that the fragment size we return is equal to what was requested. + // Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now. + byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength); + builder.appendBytesRef(new BytesRef(truncatedBytes)); + } else { + builder.appendBytesRef(new BytesRef(highlightBytes)); + } + } + if (multivalued) { + builder.endPositionEntry(); + } + } + } + + private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws CharacterCodingException { + if (bytes.length <= maxLength) return bytes; + + CharsetDecoder dec = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.IGNORE) + .onUnmappableCharacter(CodingErrorAction.IGNORE); + + CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength)); + String trimmed = chars.toString().trim(); + ByteBuffer out = StandardCharsets.UTF_8.encode(trimmed); + + byte[] result = new byte[out.remaining()]; + out.get(result); + return result; + } + + private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) { + return () -> { + StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet()); + try { + LeafStoredFieldLoader leafRootLoader = rootLoader.getLoader(ctx, null); + leafRootLoader.advanceTo(doc); + return Source.fromBytes(leafRootLoader.source()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + } + + @Override + protected void appendNoMatch(BytesRefBlock.Builder builder) { + builder.appendNull(); + } + + @Override + public Block eval(Page page) { + return executeQuery(page); + } + + public record Factory( + ShardConfig[] shardConfigs, + String fieldName, + Integer numFragments, + Integer fragmentSize, + SearchContext searchContext, + Map highlighters + ) implements EvalOperator.ExpressionEvaluator.Factory { + @Override + public EvalOperator.ExpressionEvaluator get(DriverContext context) { + return new HighlighterExpressionEvaluator( + context.blockFactory(), + shardConfigs, + fieldName, + numFragments, + fragmentSize, + searchContext, + highlighters + ); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index c7f187c6c4a8f..ad05d27b8f42d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -17,7 +17,6 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.DocBlock; @@ -267,7 +266,7 @@ Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOE scoreBuilder, ctx, LuceneQueryEvaluator.this::appendNoMatch, - LuceneQueryEvaluator.this::appendMatch, + (builder, scorer1, docId, ctc, query) -> LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query), weight.getQuery() ) ) { @@ -310,12 +309,12 @@ private void initScorer(int minDocId) throws IOException { private void scoreSingleDocWithScorer(T builder, int doc) throws IOException { if (scorer.iterator().docID() == doc) { - appendMatch(builder, scorer); + appendMatch(builder, scorer, doc, ctx, weight.getQuery()); } else if (scorer.iterator().docID() > doc) { appendNoMatch(builder); } else { if (scorer.iterator().advance(doc) == doc) { - appendMatch(builder, scorer); + appendMatch(builder, scorer, doc, ctx, weight.getQuery()); } else { appendNoMatch(builder); } @@ -323,6 +322,11 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException { } } + @FunctionalInterface + public interface MatchAppender { + void accept(T t, U u, int docId, LeafReaderContext leafReaderContext, Query query) throws E; + } + /** * Collects matching information for dense range of doc ids. This assumes that * doc ids are sent to {@link LeafCollector#collect(int)} in ascending order @@ -333,7 +337,7 @@ static class DenseCollector implements LeafCollector, R private final int max; private final LeafReaderContext leafReaderContext; private final Consumer appendNoMatch; - private final CheckedBiConsumer appendMatch; + private final MatchAppender appendMatch; private final Query query; private Scorable scorer; @@ -345,7 +349,7 @@ static class DenseCollector implements LeafCollector, R U scoreBuilder, LeafReaderContext leafReaderContext, Consumer appendNoMatch, - CheckedBiConsumer appendMatch, + MatchAppender appendMatch, Query query ) { this.scoreBuilder = scoreBuilder; @@ -367,7 +371,7 @@ public void collect(int doc) throws IOException { while (next++ < doc) { appendNoMatch.accept(scoreBuilder); } - appendMatch.accept(scoreBuilder, scorer); + appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query); } public Block build() { @@ -405,7 +409,8 @@ public void close() { /** * Appends a matching result to a builder created by @link createVectorBuilder} */ - protected abstract void appendMatch(T builder, Scorable scorer) throws IOException; + protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException; /** * Appends a non matching result to a builder created by @link createVectorBuilder} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java index 814ecaa577238..c249620060685 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; @@ -58,7 +59,8 @@ protected void appendNoMatch(BooleanBlock.Builder builder) { } @Override - protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer) throws IOException { + protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { builder.appendBoolean(true); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java index 9c6db6b0bdc63..88b5721a6fdf9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; @@ -60,7 +61,8 @@ protected void appendNoMatch(DoubleBlock.Builder builder) { } @Override - protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer) throws IOException { + protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { builder.appendDouble(scorer.score()); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java index 6042a3c8cca5f..616679669b46f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java @@ -29,7 +29,7 @@ protected DenseCollector createDenseCollector(int min, int blockFactory().newBooleanBlockBuilder(max - min + 1), null, b -> b.appendBoolean(false), - (b, s) -> b.appendBoolean(true), + (b, s, d, lr, q) -> b.appendBoolean(true), null ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java index ba075ac98feb8..af162db91978f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java @@ -33,7 +33,7 @@ protected LuceneQueryEvaluator.DenseCollector createDenseCo blockFactory().newDoubleBlockBuilder(max - min + 1), null, b -> b.appendDouble(NO_MATCH_SCORE), - (b, s) -> b.appendDouble(s.score()), + (b, s, d, lr, q) -> b.appendDouble(s.score()), null ); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec new file mode 100644 index 0000000000000..d432b3c4da377 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec @@ -0,0 +1,139 @@ +############################################### +# Tests for ExtractSnippets function +# + +extractSnippetsWithField +required_capability: extract_snippets_function + +// tag::extract-snippets-with-field[] +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +// end::extract-snippets-with-field[] +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 1 +; + +// tag::extract-snippets-with-field-result[] +book_no:keyword | author:text | title:text | snippets:keyword +1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th +// end::extract-snippets-with-field-result[] +; + +extractSnippetsWithMatch +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 1, 50) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | is accompanied by appropriate passage from The Hob +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | Tolkien, beloved author of THE HOBBIT. +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | This beautiful gift edition of The Hobbit, J.R.R. +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | them all - which has fallen into the hands of the +; + +extractMultipleSnippetsWithMatch +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3, 25) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of, Tolkien's own children, T] +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Ho, , THE HOBBIT: AN UNEXPECT, film adaptation of The Ho] +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hob +; + + +extractMultipleSnippetsWithMatchMvExpand +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3, 25) +| MV_EXPAND snippets +| KEEP book_no, author, title, snippets +| SORT snippets +| LIMIT 9 +; + +book_no:keyword | author:text | title:text | snippets:keyword +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | , THE HOBBIT: AN UNEXPECT +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit +6760 | J. R. R. Tolkien | Roverandom | By the author of The Hobb +7350 | [Christopher Tolkien, John Ronald Reuel Tolkien] | Return of the Shadow | The character of the hobb +4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's Hobbit poems in +4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's acclaimed The H +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, T +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of +; + +extractMultipleSnippetsWithSomeNoMatches +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(author, "Faulkner") +| EVAL snippets = extract_snippets(description, "slavery", 1, 25) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | , and the abolition of sl +2713 | William Faulkner | Collected Stories of William Faulkner | null +2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null +2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null +3293 | Danny Faulkner | Universe by Design | null +; + +extractSnippetsWithDefaultNumSnippetsAndLength + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit") +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | of The Hob +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | in The Hob +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob +; + +extractSnippetsWithDefaultLength + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [of The Hob, Baggins is, children,] +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [in The Hob, , THE HOBB, of The Hob] +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java new file mode 100644 index 0000000000000..d4a99d18d63ef --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java @@ -0,0 +1,193 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.client.internal.IndicesAdminClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.junit.Before; + +import java.util.Collections; +import java.util.List; +import java.util.function.Consumer; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class ExtractSnippetsFunctionIT extends AbstractEsqlIntegTestCase { + + private static final List EMPTY_RESULT = Collections.singletonList(null); + + @Before + public void setupIndex() { + createAndPopulateIndex(this::ensureYellow); + } + + public void testExtractSnippets() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of("The quick brown"), List.of("This is a brown"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) + ); + } + } + + public void testExtractMultipleSnippets() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of( + List.of(List.of("The quick brown", "Afterward, the")), + List.of(List.of("This is a brown", "Sometimes the b")), + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT + ) + ); + } + } + + public void testExtractSnippetsWithMatch() { + var query = """ + FROM test METADATA _score + | WHERE MATCH(content, "fox") + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues(resp.values(), List.of(List.of("The quick brown"), List.of("This is a brown"))); + } + } + + public void testExtractMultipleSnippetsWithMatch() { + var query = """ + FROM test METADATA _score + | WHERE MATCH(content, "fox") + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of(List.of("The quick brown", "Afterward, the")), List.of(List.of("This is a brown", "Sometimes the b"))) + ); + } + } + + public void testExtractSnippetDefaults() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox") + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of("is a brown"), List.of("quick brow"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) + ); + } + } + + public void testExtractSnippetDefaultLength() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 3) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of( + List.of(List.of("is a brown", "the brown")), + List.of(List.of("quick brow", "the brown")), + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT + ) + ); + } + } + + static void createAndPopulateIndex(Consumer ensureYellow) { + var indexName = "test"; + var client = client().admin().indices(); + var createRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(createRequest); + client().prepareBulk().add(new IndexRequest(indexName).id("1").source("id", 1, "content", """ + This is a brown fox that likes to run through the meadow. + Sometimes the brown fox pauses to look around before continuing. + """)).add(new IndexRequest(indexName).id("2").source("id", 2, "content", """ + This is a brown dog that spends most of the day sleeping in the yard. + The brown dog occasionally wakes up to bark at the mailman. + """)).add(new IndexRequest(indexName).id("3").source("id", 3, "content", """ + This dog is really brown and enjoys chasing sticks near the river. + People often comment on how brown the dog looks in the sunlight. + """)).add(new IndexRequest(indexName).id("4").source("id", 4, "content", """ + The quick brown fox jumps over the lazy dog whenever it feels playful. + Afterward, the brown fox runs off into the forest. + """)).add(new IndexRequest(indexName).id("5").source("id", 5, "content", """ + There is also a white cat that prefers to sit quietly by the window. + Unlike the other animals, the white cat ignores everything around it. + """)).add(new IndexRequest(indexName).id("6").source("id", 6, "content", """ + The dog is brown but this document is very very long, filled with many words describing the scene. + Even so, the brown dog is still the main focus of the story. + """)).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); + + var lookupIndexName = "test_lookup"; + createAndPopulateLookupIndex(client, lookupIndexName); + + ensureYellow.accept(new String[] { indexName, lookupIndexName }); + } + + static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) { + var createRequest = client.prepareCreate(lookupIndexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup")) + .setMapping("id", "type=integer", "lookup_content", "type=text"); + assertAcked(createRequest); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index cdef9f8c33cbd..df95677b90f24 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1384,7 +1384,12 @@ public enum Cap { /** * Support for vector Hamming distance. */ - HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()); + HAMMING_VECTOR_SIMILARITY_FUNCTION(Build.current().isSnapshot()), + + /** + * Support for the EXTRACT_SNIPPETS function. + */ + EXTRACT_SNIPPETS_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 311f666581279..a8b01a749f1a0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -75,6 +75,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length; import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim; @@ -223,6 +224,7 @@ public static List unaryScalars() { entries.add(WildcardLike.ENTRY); entries.add(WildcardLikeList.ENTRY); entries.add(Delay.ENTRY); + entries.add(ExtractSnippets.ENTRY); // mv functions entries.addAll(MvFunctionWritables.getNamedWriteables()); return entries; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 9d6372702d842..0ff9c5cc0a5b5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -163,6 +163,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; @@ -509,7 +510,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(L1Norm.class, L1Norm::new, "v_l1_norm"), def(L2Norm.class, L2Norm::new, "v_l2_norm"), def(Magnitude.class, Magnitude::new, "v_magnitude"), - def(Hamming.class, Hamming::new, "v_hamming") } }; + def(Hamming.class, Hamming::new, "v_hamming"), + def(ExtractSnippets.class, quad(ExtractSnippets::new), "extract_snippets") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java new file mode 100644 index 0000000000000..6eb531356de4b --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -0,0 +1,356 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator; +import org.elasticsearch.compute.lucene.LuceneQueryEvaluator; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.Rewriteable; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.Check; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; +import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; +import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.MatchQuery; +import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.fieldAsFieldAttribute; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.getNameFromFieldAttribute; + +/** + * Extract snippets function, that extracts the most relevant snippets from a given input string + */ +public class ExtractSnippets extends EsqlScalarFunction + implements + TwoOptionalArguments, + RewriteableAware, + TranslationAware, + EvaluatorMapper { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "ExtractSnippets", + ExtractSnippets::new + ); + + private static final int DEFAULT_NUM_SNIPPETS = 1; + // TODO: Determine good default, set artificially low for POC purposes + private static final int DEFAULT_SNIPPET_LENGTH = 10; + + private final Expression field, str, numSnippets, snippetLength; + private final QueryBuilder queryBuilder; + + @FunctionInfo( + returnType = "keyword", + preview = true, + description = """ + Extracts the most relevant snippets to return from a given input string.""", + examples = { + @Example(file = "extract-snippets-function", tag = "extract-snippets-with-field", applies_to = "stack: preview 9.2.0") } + ) + public ExtractSnippets( + Source source, + @Param(name = "field", type = { "keyword", "text" }, description = "The input string") Expression field, + @Param(name = "str", type = { "keyword" }, description = "The input string") Expression str, + @Param( + optional = true, + name = "num_snippets", + type = { "integer" }, + description = "The number of snippets to return. Defaults to " + DEFAULT_NUM_SNIPPETS + ) Expression numSnippets, + @Param( + optional = true, + name = "snippet_length", + type = { "integer" }, + description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH + ) Expression snippetLength + ) { + this(source, field, str, numSnippets, snippetLength, new MatchQueryBuilder(field.sourceText(), str.sourceText())); + + } + + public ExtractSnippets( + Source source, + Expression field, + Expression str, + Expression numSnippets, + Expression snippetLength, + QueryBuilder queryBuilder + ) { + super(source, fields(field, str, numSnippets, snippetLength)); + this.field = field; + this.str = str; + this.numSnippets = numSnippets; + this.snippetLength = snippetLength; + this.queryBuilder = queryBuilder; + }; + + public ExtractSnippets(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteable(Expression.class), + in.readNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(QueryBuilder.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field); + out.writeNamedWriteable(str); + out.writeOptionalNamedWriteable(numSnippets); + out.writeOptionalNamedWriteable(snippetLength); + out.writeOptionalNamedWriteable(queryBuilder); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return field.dataType().noText(); + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(field(), sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + + resolution = isString(str(), sourceText(), SECOND); + if (resolution.unresolved()) { + return resolution; + } + + resolution = numSnippets() == null + ? TypeResolution.TYPE_RESOLVED + : isType(numSnippets(), dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer"); + if (resolution.unresolved()) { + return resolution; + } + + return snippetLength() == null + ? TypeResolution.TYPE_RESOLVED + : isType(snippetLength(), dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer"); + } + + @Override + public boolean foldable() { + return field().foldable() + && str().foldable() + && (numSnippets() == null || numSnippets().foldable()) + && (snippetLength() == null || snippetLength().foldable()); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new ExtractSnippets( + source(), + newChildren.get(0), // field + newChildren.get(1), // str + numSnippets == null ? null : newChildren.get(2), + snippetLength == null ? null : newChildren.get(3), + queryBuilder + ); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength, queryBuilder); + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + List shardContexts = toEvaluator.shardContexts(); + LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; + + int numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); + int snippetSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small()); + + int i = 0; + for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { + SearchExecutionContext searchExecutionContext = shardContext.searchExecutionContext(); + SearchContext searchContext = shardContext.searchContext(); + if (searchContext == null) { + throw new IllegalStateException("Missing search context, cannot extract snippets"); + } + + try { + // We need to call rewrite here, to ensure we rewrite on both coordinator and data nodes. + assert queryBuilder != null : "ExtractSnippets missing required state"; + QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); + SearchHighlightContext highlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets( + searchExecutionContext, + fieldName(), + numSnippets, + snippetSize, + rewritten + ); + searchContext.highlight(highlightContext); + + } catch (IOException e) { + throw new RuntimeException( + "Failed to create highlight context for field [" + + fieldName() + + "], str [" + + searchString() + + "], numSnippets: [" + + numSnippets + + "], snippetLength: [" + + snippetLength + + "]", + e + ); + } + + shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); + } + // Get field name and search context from the first shard context + SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.getFirst().searchContext(); + Map highlighters = firstSearchContext == null ? Map.of() : firstSearchContext.highlighters(); + return new HighlighterExpressionEvaluator.Factory( + shardConfigs, + fieldName(), + numSnippets, + snippetSize, + firstSearchContext, + highlighters + ); + } + + @Override + public QueryBuilder queryBuilder() { + return queryBuilder; + } + + @Override + public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { + return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder); + } + + @Override + public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { + // We don't want pushdown for this function, as it is not a filter query + return Translatable.NO; + } + + @Override + public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + return queryBuilder != null + ? new TranslationAwareExpressionQuery(source(), queryBuilder()) + : translate(pushdownPredicates, handler); + } + + private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + Object query = str().fold(FoldContext.small()); + // Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided + return new MatchQuery(source(), fieldName(), query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true)); + } + + Expression field() { + return field; + } + + private String fieldName() { + var fieldAttribute = fieldAsFieldAttribute(field()); + Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument"); + return getNameFromFieldAttribute(fieldAttribute); + } + + Expression str() { + return str; + } + + private String searchString() { + var strAttribute = fieldAsFieldAttribute(str()); + Check.notNull(strAttribute, "Highlight must have a str attribute as the second argument"); + return getNameFromFieldAttribute(strAttribute); + } + + Expression numSnippets() { + return numSnippets; + } + + Expression snippetLength() { + return snippetLength; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + ExtractSnippets extractSnippets = (ExtractSnippets) o; + return Objects.equals(field(), extractSnippets.field()) + && Objects.equals(str(), extractSnippets.str()) + && Objects.equals(numSnippets(), extractSnippets.numSnippets()) + && Objects.equals(snippetLength(), extractSnippets.snippetLength()) + && Objects.equals(queryBuilder(), extractSnippets.queryBuilder()); + } + + @Override + public int hashCode() { + return Objects.hash(field(), str(), numSnippets(), snippetLength(), queryBuilder()); + } + + private static List fields(Expression field, Expression str, Expression numSnippets, Expression snippetLength) { + List list = new ArrayList<>(4); + list.add(field); + list.add(str); + if (numSnippets != null) { + list.add(numSnippets); + if (snippetLength != null) { + list.add(snippetLength); + } + } + return list; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 5b32cdbbacdc9..6b6eec8004c87 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -59,6 +59,7 @@ import org.elasticsearch.logging.Logger; import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.search.internal.AliasFilter; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.sort.SortBuilder; @@ -109,6 +110,10 @@ protected void closeInternal() { } }; + public abstract SearchExecutionContext searchExecutionContext(); + + public abstract SearchContext searchContext(); + @Override public void incRef() { refCounted.incRef(); @@ -428,6 +433,19 @@ public DefaultShardContext(int index, Releasable releasable, SearchExecutionCont this.shardIdentifier = this.ctx.getFullyQualifiedIndex().getName() + ":" + this.ctx.getShardId(); } + @Override + public SearchExecutionContext searchExecutionContext() { + return ctx; + } + + @Override + public SearchContext searchContext() { + if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) { + return searchContext; + } + return null; + } + @Override public int index() { return index; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 869a851a1fb34..23d5468c7fc15 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -335,6 +335,10 @@ public final void test() throws Throwable { "CSV tests cannot currently handle multi_match function that depends on Lucene", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.capabilityName()) ); + assumeFalse( + "CSV tests cannot currently handle EXTRACT_SNIPPETS", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.EXTRACT_SNIPPETS_FUNCTION.capabilityName()) + ); if (Build.current().isSnapshot()) { assertThat( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java index e55a1b039258e..c87cc11306b13 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java @@ -18,6 +18,7 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.ExistsQueryBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.RegexpQueryBuilder; @@ -113,6 +114,7 @@ public static NamedWriteableRegistry writableRegistry() { entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, RegexpQueryBuilder.NAME, RegexpQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, ExistsQueryBuilder.NAME, ExistsQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, KnnVectorQueryBuilder.NAME, KnnVectorQueryBuilder::new)); + entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, MatchQueryBuilder.NAME, MatchQueryBuilder::new)); entries.add(SingleValueQuery.ENTRY); entries.addAll(ExpressionWritables.getNamedWriteables()); entries.addAll(PlanWritables.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java new file mode 100644 index 0000000000000..da5e85a0dff98 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.stringCases; +import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("extract_snippets") +public class ExtractSnippetsTests extends AbstractFunctionTestCase { + + public ExtractSnippetsTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + return parameterSuppliersFromTypedData(testCaseSuppliers()); + } + + private static List testCaseSuppliers() { + List suppliers = new ArrayList<>(); + addStringTestCases(suppliers); + return suppliers; + } + + public static void addStringTestCases(List suppliers) { + for (DataType fieldType : DataType.stringTypes()) { + if (DataType.UNDER_CONSTRUCTION.containsKey(fieldType)) { + continue; + } + for (TestCaseSupplier.TypedDataSupplier queryDataSupplier : stringCases(fieldType)) { + suppliers.add( + TestCaseSupplier.testCaseSupplier( + queryDataSupplier, + new TestCaseSupplier.TypedDataSupplier(fieldType.typeName(), () -> randomAlphaOfLength(10), DataType.KEYWORD), + (d1, d2) -> equalTo("string"), + DataType.KEYWORD, + (o1, o2) -> true + ) + ); + } + } + } + + @Override + protected Expression build(Source source, List args) { + ExtractSnippets extractSnippets = new ExtractSnippets( + source, + args.get(0), + args.get(1), + args.size() > 2 ? args.get(2) : null, + args.size() > 3 ? args.get(3) : null + ); + // We need to add the QueryBuilder to the extract_snippets expression, as it is used to implement equals() and hashCode() and + // thus test the serialization methods. But we can only do this if the parameters make sense . + if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { + QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, extractSnippets).toQueryBuilder(); + extractSnippets.replaceQueryBuilder(queryBuilder); + } + return extractSnippets; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index 8e55cc9c222b5..dda4d7d27f376 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; @@ -307,6 +308,8 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { public void visitLeaf(Query query) { if (query instanceof MatchAllDocsQuery) { queries.add(new MatchAllDocsQuery()); + } else if (query instanceof ToParentBlockJoinQuery toParentBlockJoinQuery) { + queries.add(toParentBlockJoinQuery.getChildQuery()); } } }); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index 66fb4a366a757..5c3ae35f72ea2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -12,8 +12,8 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.rank.RankShardResult; @@ -73,20 +73,17 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI public void prepareForFetch(SearchContext context) { if (snippetRankInput != null) { try { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - highlightBuilder.highlightQuery(snippetRankInput.snippetQueryBuilder()); - // Stripping pre/post tags as they're not useful for snippet creation - highlightBuilder.field(field).preTags("").postTags(""); - // Return highest scoring fragments - highlightBuilder.order(HighlightBuilder.Order.SCORE); int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS; - highlightBuilder.numOfFragments(numSnippets); // Rely on the model to determine the fragment size int tokenSizeLimit = snippetRankInput.tokenSizeLimit(); int fragmentSize = tokenSizeLimit * TOKEN_SIZE_LIMIT_MULTIPLIER; - highlightBuilder.fragmentSize(fragmentSize); - highlightBuilder.noMatchSize(fragmentSize); - SearchHighlightContext searchHighlightContext = highlightBuilder.build(context.getSearchExecutionContext()); + SearchHighlightContext searchHighlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets( + context.getSearchExecutionContext(), + field, + numSnippets, + fragmentSize, + snippetRankInput.snippetQueryBuilder() + ); context.highlight(searchHighlightContext); } catch (IOException e) { throw new RuntimeException("Failed to generate snippet request", e);