From ee5601811c530216030d85bbc54657d534807b75 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 18 Jul 2025 10:38:39 -0400 Subject: [PATCH 01/44] Initial plumbing for an ES|QL extract_snippets function --- .../esql/expression/ExpressionWritables.java | 2 + .../function/EsqlFunctionRegistry.java | 4 +- .../scalar/string/ExtractSnippets.java | 221 ++++++++++++++++++ 3 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 311f666581279..a8b01a749f1a0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -75,6 +75,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMax; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StYMin; import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length; import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim; @@ -223,6 +224,7 @@ public static List unaryScalars() { entries.add(WildcardLike.ENTRY); entries.add(WildcardLikeList.ENTRY); entries.add(Delay.ENTRY); + entries.add(ExtractSnippets.ENTRY); // mv functions entries.addAll(MvFunctionWritables.getNamedWriteables()); return entries; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0c2629596a9b4..590a40557bc03 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -159,6 +159,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; @@ -491,7 +492,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(StGeohex.class, StGeohex::new, "st_geohex"), def(StGeohexToLong.class, StGeohexToLong::new, "st_geohex_to_long"), def(StGeohexToString.class, StGeohexToString::new, "st_geohex_to_string"), - def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine") } }; + def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), + def(ExtractSnippets.class, ExtractSnippets::new, "extract_snippets") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java new file mode 100644 index 0000000000000..9066724cece1c --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -0,0 +1,221 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.lucene.LuceneQueryEvaluator; +import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; + +/** + * Extract snippets function, that extracts the most relevant snippets from a given input string + */ +public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "ExtractSnippets", + ExtractSnippets::new + ); + + private static final int DEFAULT_NUM_SNIPPETS = 1; + private static final int DEFAULT_SNIPPET_LENGTH = 10; // TODO determine a good default. 512 * 5? + + // TODO better names? + private final Expression field, str, numSnippets, snippetLength; + + @FunctionInfo( + returnType = "keyword", + description = """ + Extracts the most relevant snippets to return from a given input string""", + examples = @Example(file = "keyword", tag = "extract_snippets") + ) + public ExtractSnippets( + Source source, + @Param(name = "field", type = { "keyword" }, description = "The input string") Expression field, + @Param(name = "str", type = { "keyword", "text" }, description = "The input string") Expression str, + @Param( + optional = true, + name = "num_snippets", + type = { "integer" }, + description = "The number of snippets to return. Defaults to " + DEFAULT_NUM_SNIPPETS + ) Expression numSnippets, + @Param( + optional = true, + name = "snippet_length", + type = { "integer" }, + description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH + ) Expression snippetLength + ) { + super(source, numSnippets == null ? Collections.singletonList(str) : Arrays.asList(str, numSnippets)); + this.field = field; + this.str = str; + this.numSnippets = numSnippets; + this.snippetLength = snippetLength; + } + + private ExtractSnippets(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + in.readNamedWriteable(Expression.class), + in.readNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(Expression.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field); + out.writeNamedWriteable(str); + out.writeOptionalNamedWriteable(numSnippets); + out.writeOptionalNamedWriteable(snippetLength); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return field.dataType().noText(); + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(field, sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + + resolution = isString(str, sourceText(), SECOND); + if (resolution.unresolved()) { + return resolution; + } + + resolution = numSnippets == null + ? TypeResolution.TYPE_RESOLVED + : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer"); + if (resolution.unresolved()) { + return resolution; + } + + return snippetLength == null + ? TypeResolution.TYPE_RESOLVED + : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer"); + } + + @Override + public boolean foldable() { + return field.foldable() + && str.foldable() + && (numSnippets == null || numSnippets.foldable()) + && (snippetLength == null || snippetLength.foldable()); + } + + // @Evaluator + // static BytesRef process(BytesRef field, BytesRef str, int numSnippets, int snippetLength) { + // if (field == null || field.length == 0 || str == null || str.length == 0) { + // return null; + // } + // + // String utf8Field = field.utf8ToString(); + // String utf8Str = str.utf8ToString(); + // if (snippetLength > utf8Field.length()) { + // return field; + // } + // + // // TODO - actually calculate snippets using search string, this truncation is just a placeholder + // List snippets = new ArrayList<>(numSnippets); + // int pos = 0; + // for (int i = 0; i < numSnippets && pos < utf8Field.length(); i++) { + // int end = Math.min(pos + snippetLength, utf8Field.length()); + // String snippet = utf8Field.substring(pos, end); + // snippets.add(snippet); + // pos += snippetLength; + // } + // return snippets.get(0); + // } + // + // @Evaluator(extraName = "NoStart") + // static BytesRef process(BytesRef field, BytesRef str) { + // return process(field, str, DEFAULT_NUM_SNIPPETS, DEFAULT_SNIPPET_LENGTH); + // } + + @Override + public Expression replaceChildren(List newChildren) { + return new ExtractSnippets( + source(), + newChildren.get(0), + newChildren.get(1), + numSnippets == null ? null : newChildren.get(1), + snippetLength == null ? null : newChildren.get(2) + ); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength); + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + List shardContexts = toEvaluator.shardContexts(); + LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; + int i = 0; + for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { + shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); + } + return new LuceneQueryExpressionEvaluator.Factory(shardConfigs); + } + + } + + Expression str() { + return str; + } + + Expression numSnippets() { + return numSnippets; + } + + Expression snippetLength() { + return snippetLength; + } +} From eb0a8769550939bce0444692c4e58fc2eaf6b0c5 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 29 Jul 2025 15:05:33 -0400 Subject: [PATCH 02/44] Add HighlighterExpressionEvaluator --- .../HighlighterExpressionEvaluator.java | 68 ++++++++++++++++++ .../scalar/string/ExtractSnippets.java | 70 +++++++------------ 2 files changed, 94 insertions(+), 44 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java new file mode 100644 index 0000000000000..0d2dff8bb6f0e --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; + +import java.io.IOException; + +public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator + implements + EvalOperator.ExpressionEvaluator { + + HighlighterExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shardConfigs) { + super(blockFactory, shardConfigs); + } + + @Override + protected ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + @Override + protected Vector createNoMatchVector(BlockFactory blockFactory, int size) { + return blockFactory.newConstantBytesRefVector(new BytesRef(), size); + } + + @Override + protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) { + return blockFactory.newBytesRefVectorBuilder(size); + } + + @Override + protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException { + // TODO: add snippets here + builder.appendBytesRef(new BytesRef("highlighted text")); // Placeholder for actual highlighted text + } + + @Override + protected void appendNoMatch(BytesRefVector.Builder builder) { + // TODO: No-op? + } + + @Override + public Block eval(Page page) { + return executeQuery(page); + } + + public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { + @Override + public EvalOperator.ExpressionEvaluator get(DriverContext context) { + return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 9066724cece1c..6369cb7c0e641 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -10,10 +10,13 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator; import org.elasticsearch.compute.lucene.LuceneQueryEvaluator; -import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -26,15 +29,16 @@ import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; @@ -119,73 +123,44 @@ protected TypeResolution resolveType() { return new TypeResolution("Unresolved children"); } - TypeResolution resolution = isString(field, sourceText(), FIRST); + TypeResolution resolution = isString(field(), sourceText(), FIRST); if (resolution.unresolved()) { return resolution; } - resolution = isString(str, sourceText(), SECOND); + resolution = isString(str(), sourceText(), SECOND); if (resolution.unresolved()) { return resolution; } - resolution = numSnippets == null + resolution = numSnippets() == null ? TypeResolution.TYPE_RESOLVED - : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer"); + : isType(numSnippets(), dt -> dt == DataType.INTEGER, sourceText(), THIRD, "integer"); if (resolution.unresolved()) { return resolution; } - return snippetLength == null + return snippetLength() == null ? TypeResolution.TYPE_RESOLVED - : isType(numSnippets, dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer"); + : isType(snippetLength(), dt -> dt == DataType.INTEGER, sourceText(), FOURTH, "integer"); } @Override public boolean foldable() { - return field.foldable() - && str.foldable() - && (numSnippets == null || numSnippets.foldable()) - && (snippetLength == null || snippetLength.foldable()); + return field().foldable() + && str().foldable() + && (numSnippets() == null || numSnippets().foldable()) + && (snippetLength() == null || snippetLength().foldable()); } - // @Evaluator - // static BytesRef process(BytesRef field, BytesRef str, int numSnippets, int snippetLength) { - // if (field == null || field.length == 0 || str == null || str.length == 0) { - // return null; - // } - // - // String utf8Field = field.utf8ToString(); - // String utf8Str = str.utf8ToString(); - // if (snippetLength > utf8Field.length()) { - // return field; - // } - // - // // TODO - actually calculate snippets using search string, this truncation is just a placeholder - // List snippets = new ArrayList<>(numSnippets); - // int pos = 0; - // for (int i = 0; i < numSnippets && pos < utf8Field.length(); i++) { - // int end = Math.min(pos + snippetLength, utf8Field.length()); - // String snippet = utf8Field.substring(pos, end); - // snippets.add(snippet); - // pos += snippetLength; - // } - // return snippets.get(0); - // } - // - // @Evaluator(extraName = "NoStart") - // static BytesRef process(BytesRef field, BytesRef str) { - // return process(field, str, DEFAULT_NUM_SNIPPETS, DEFAULT_SNIPPET_LENGTH); - // } - @Override public Expression replaceChildren(List newChildren) { return new ExtractSnippets( source(), + field, newChildren.get(0), - newChildren.get(1), numSnippets == null ? null : newChildren.get(1), - snippetLength == null ? null : newChildren.get(2) + snippetLength ); } @@ -202,9 +177,16 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); } - return new LuceneQueryExpressionEvaluator.Factory(shardConfigs); + return new HighlighterExpressionEvaluator.Factory(shardConfigs); + + } + + private QueryBuilder queryBuilder() { + return new MatchQueryBuilder(field.sourceText(), str.sourceText()); } + Expression field() { + return field; } Expression str() { From 8c0f312226b3c9eb8bae4865bd036ad63ae67442 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 30 Jul 2025 17:40:46 +0200 Subject: [PATCH 03/44] Pair programming session --- .../xpack/esql/plugin/ExtractSnippetsIT.java | 75 +++++++++++++++++++ .../scalar/string/ExtractSnippets.java | 8 +- .../xpack/esql/analysis/AnalyzerTests.java | 13 +++- 3 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java new file mode 100644 index 0000000000000..4b5affa2f6fc3 --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.client.internal.IndicesAdminClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.junit.Before; + +import java.util.List; +import java.util.function.Consumer; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class ExtractSnippetsIT extends AbstractEsqlIntegTestCase { + + @Before + public void setupIndex() { + createAndPopulateIndex(this::ensureYellow); + } + + public void testExtractSnippets() { + var query = """ + FROM test + | EVAL x = extract_snippets(content, "fox", 1, 10) + | KEEP x + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("x")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(6))); + } + } + + + + static void createAndPopulateIndex(Consumer ensureYellow) { + var indexName = "test"; + var client = client().admin().indices(); + var createRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(createRequest); + client().prepareBulk() + .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox")) + .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog")) + .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown")) + .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long")) + .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat")) + .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog")) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + + var lookupIndexName = "test_lookup"; + createAndPopulateLookupIndex(client, lookupIndexName); + + ensureYellow.accept(new String[] { indexName, lookupIndexName }); + } + + static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) { + var createRequest = client.prepareCreate(lookupIndexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup")) + .setMapping("id", "type=integer", "lookup_content", "type=text"); + assertAcked(createRequest); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 6369cb7c0e641..fc13767a01ceb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -81,7 +81,7 @@ public ExtractSnippets( description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH ) Expression snippetLength ) { - super(source, numSnippets == null ? Collections.singletonList(str) : Arrays.asList(str, numSnippets)); + super(source, List.of(field, str, numSnippets, snippetLength)); this.field = field; this.str = str; this.numSnippets = numSnippets; @@ -157,10 +157,10 @@ && str().foldable() public Expression replaceChildren(List newChildren) { return new ExtractSnippets( source(), - field, newChildren.get(0), - numSnippets == null ? null : newChildren.get(1), - snippetLength + newChildren.get(1), + numSnippets == null ? null : newChildren.get(2), + newChildren.get(3) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 439e10cce27d4..32cfe6daaf92e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.LoadMapping; import org.elasticsearch.xpack.esql.VerificationException; @@ -143,7 +144,7 @@ import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; -//@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug") +@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug") public class AnalyzerTests extends ESTestCase { private static final UnresolvedRelation UNRESOLVED_RELATION = new UnresolvedRelation( @@ -2870,6 +2871,16 @@ public void testFromEnrichAndMatchColonUsage() { assertEquals(esRelation.indexPattern(), "test"); } + public void testSnippets() { + LogicalPlan plan = analyze(""" + from test + | EVAL x = extract_snippets(first_name, "text", 1, 10) + | KEEP x + """); + var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); + } + public void testFunctionNamedParamsAsFunctionArgument() { LogicalPlan plan = analyze(""" from test From 86dc82ae3efb62268e6e9b8bdfe7d908c15e77f8 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 30 Jul 2025 14:25:49 -0400 Subject: [PATCH 04/44] Create highlight query --- .../HighlighterExpressionEvaluator.java | 5 ++- .../scalar/string/ExtractSnippets.java | 14 +++++-- .../planner/EsPhysicalOperationProviders.java | 42 +++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 0d2dff8bb6f0e..f2a4e1d00c0d7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -46,12 +46,12 @@ protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, @Override protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException { // TODO: add snippets here - builder.appendBytesRef(new BytesRef("highlighted text")); // Placeholder for actual highlighted text + builder.appendBytesRef(new BytesRef("I am a snippet")); // Placeholder for actual highlighted text } @Override protected void appendNoMatch(BytesRefVector.Builder builder) { - // TODO: No-op? + builder.appendBytesRef(null); } @Override @@ -62,6 +62,7 @@ public Block eval(Page page) { public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { + // TODO: Is it possible to add the highlight queyr here, rather than in ExtractSnippets? return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index fc13767a01ceb..e8644e94334c1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -15,6 +15,7 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -157,10 +158,10 @@ && str().foldable() public Expression replaceChildren(List newChildren) { return new ExtractSnippets( source(), - newChildren.get(0), - newChildren.get(1), + newChildren.get(0), // field + newChildren.get(1), // str numSnippets == null ? null : newChildren.get(2), - newChildren.get(3) + snippetLength == null ? null : newChildren.get(3) ); } @@ -175,6 +176,13 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { + shardContext.addHighlightQuery( + field.sourceText(), + str.sourceText(), + Integer.parseInt(numSnippets.sourceText()), + Integer.parseInt(snippetLength.sourceText()), + queryBuilder() + ); shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); } return new HighlighterExpressionEvaluator.Factory(shardConfigs); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index e0b570267899b..e37670f6bd3fc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -52,6 +52,8 @@ import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.AliasFilter; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.sort.SortAndFormats; @@ -136,6 +138,8 @@ public boolean hasReferences() { * need one in ten documents. */ public abstract double storedFieldsSequentialProportion(); + + public abstract void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder); } private final List shardContexts; @@ -498,6 +502,44 @@ public double storedFieldsSequentialProportion() { return EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.get(ctx.getIndexSettings().getSettings()); } + @Override + public void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder) { + try { + // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch + HighlightBuilder highlightBuilder = new HighlightBuilder(); + if (queryBuilder != null) { + highlightBuilder.highlightQuery(queryBuilder); + } + // Stripping pre/post tags as they're not useful for snippet creation + highlightBuilder.field(field).preTags("").postTags(""); + // Return highest scoring fragments + highlightBuilder.order(HighlightBuilder.Order.SCORE); + highlightBuilder.numOfFragments(numSnippets); + highlightBuilder.fragmentSize(snippetLength); + highlightBuilder.noMatchSize(snippetLength); + + SearchHighlightContext highlightContext = highlightBuilder.build(ctx); + + // Update the active SearchContext with the highlight context + if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) { + searchContext.highlight(highlightContext); + } + } catch (IOException e) { + throw new RuntimeException( + "Failed to create highlight context for field [" + + field + + "], str [" + + str + + "], numSnippets: [" + + numSnippets + + "], snippetLength: [" + + snippetLength + + "]", + e + ); + } + } + @Override public void close() { releasable.close(); From 4f4f157a05368c03cc87cf42410f0682167a349d Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 1 Aug 2025 13:20:15 -0400 Subject: [PATCH 05/44] Make extract snippets rewriteable --- .../HighlighterExpressionEvaluator.java | 6 +- .../esql/capabilities/RewriteableAware.java | 19 ++++++ .../function/EsqlFunctionRegistry.java | 2 +- .../function/fulltext/FullTextFunction.java | 7 +- .../fulltext/QueryBuilderResolver.java | 20 +++--- .../scalar/string/ExtractSnippets.java | 66 +++++++++++++++---- 6 files changed, 91 insertions(+), 29 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index f2a4e1d00c0d7..aeaff93ac0a53 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -51,7 +51,8 @@ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) thro @Override protected void appendNoMatch(BytesRefVector.Builder builder) { - builder.appendBytesRef(null); + // NOTE: Carlos originally suggested that we add null here, but that doesn't work - errors on missing key + builder.appendBytesRef(new BytesRef()); } @Override @@ -62,7 +63,8 @@ public Block eval(Page page) { public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { - // TODO: Is it possible to add the highlight queyr here, rather than in ExtractSnippets? + // TODO: Is it possible to add the highlight query here, rather than in ExtractSnippets? Would require ShardConfig having access + // to context return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java new file mode 100644 index 0000000000000..4c4a34b60e46e --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.capabilities; + +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.core.expression.Expression; + +public interface RewriteableAware { + + QueryBuilder queryBuilder(); + + Expression replaceQueryBuilder(QueryBuilder queryBuilder); + +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 590a40557bc03..45f01a0face86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -493,7 +493,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(StGeohexToLong.class, StGeohexToLong::new, "st_geohex_to_long"), def(StGeohexToString.class, StGeohexToString::new, "st_geohex_to_string"), def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), - def(ExtractSnippets.class, ExtractSnippets::new, "extract_snippets") } }; + def(ExtractSnippets.class, quad(ExtractSnippets::new), "extract_snippets") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index b5378db783f46..107bb4c14e4f6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -68,7 +69,8 @@ public abstract class FullTextFunction extends Function TranslationAware, PostAnalysisPlanVerificationAware, EvaluatorMapper, - ExpressionScoreMapper { + ExpressionScoreMapper, + RewriteableAware { private final Expression query; private final QueryBuilder queryBuilder; @@ -163,14 +165,13 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler); } + @Override public QueryBuilder queryBuilder() { return queryBuilder; } protected abstract Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler); - public abstract Expression replaceQueryBuilder(QueryBuilder queryBuilder); - @Override public BiConsumer postAnalysisPlanVerification() { return FullTextFunction::checkFullTextQueryFunctions; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index ef3828a3f2fbb..9267d039aaf91 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -13,6 +13,7 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -35,14 +36,15 @@ public final class QueryBuilderResolver { private QueryBuilderResolver() {} public static void resolveQueryBuilders(LogicalPlan plan, TransportActionServices services, ActionListener listener) { - var hasFullTextFunctions = plan.anyMatch(p -> { - Holder hasFullTextFunction = new Holder<>(false); - p.forEachExpression(FullTextFunction.class, unused -> hasFullTextFunction.set(true)); - return hasFullTextFunction.get(); + var hasRewriteableAwareFunctions = plan.anyMatch(p -> { + Holder hasRewriteable = new Holder<>(false); + p.forEachExpression(FullTextFunction.class, unused -> hasRewriteable.set(true)); + p.forEachExpression(ExtractSnippets.class, unused -> hasRewriteable.set(true)); + return hasRewriteable.get(); }); - if (hasFullTextFunctions) { + if (hasRewriteableAwareFunctions) { Rewriteable.rewriteAndFetch( - new FullTextFunctionsRewritable(plan), + new FunctionsRewritable(plan), queryRewriteContext(services, indexNames(plan)), listener.delegateFailureAndWrap((l, r) -> l.onResponse(r.plan)) ); @@ -70,9 +72,9 @@ private static Set indexNames(LogicalPlan plan) { return indexNames; } - private record FullTextFunctionsRewritable(LogicalPlan plan) implements Rewriteable { + private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable { @Override - public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { + public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { Holder exceptionHolder = new Holder<>(); Holder updated = new Holder<>(false); LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> { @@ -92,7 +94,7 @@ public FullTextFunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOExc if (exceptionHolder.get() != null) { throw exceptionHolder.get(); } - return updated.get() ? new FullTextFunctionsRewritable(newPlan) : this; + return updated.get() ? new FunctionsRewritable(newPlan) : this; } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index e8644e94334c1..a163417eebd4d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.string; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -15,9 +16,8 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -25,28 +25,27 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; import java.util.List; +import java.util.Objects; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; /** * Extract snippets function, that extracts the most relevant snippets from a given input string */ -public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument { +// TODO: Does this also need to implement TranslationAware? +public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ExtractSnippets", @@ -58,6 +57,7 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum // TODO better names? private final Expression field, str, numSnippets, snippetLength; + private final QueryBuilder queryBuilder; @FunctionInfo( returnType = "keyword", @@ -81,21 +81,34 @@ public ExtractSnippets( type = { "integer" }, description = "The length of snippets to return. Defaults to " + DEFAULT_SNIPPET_LENGTH ) Expression snippetLength + ) { + this(source, field, str, numSnippets, snippetLength, new MatchQueryBuilder(field.sourceText(), str.sourceText())); + } + + public ExtractSnippets( + Source source, + Expression field, + Expression str, + Expression numSnippets, + Expression snippetLength, + QueryBuilder queryBuilder ) { super(source, List.of(field, str, numSnippets, snippetLength)); this.field = field; this.str = str; this.numSnippets = numSnippets; this.snippetLength = snippetLength; - } + this.queryBuilder = queryBuilder; + }; - private ExtractSnippets(StreamInput in) throws IOException { + public ExtractSnippets(StreamInput in) throws IOException { this( Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class), in.readOptionalNamedWriteable(Expression.class), - in.readOptionalNamedWriteable(Expression.class) + in.readOptionalNamedWriteable(Expression.class), + in.readOptionalNamedWriteable(QueryBuilder.class) ); } @@ -106,6 +119,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeNamedWriteable(str); out.writeOptionalNamedWriteable(numSnippets); out.writeOptionalNamedWriteable(snippetLength); + out.writeOptionalNamedWriteable(queryBuilder); } @Override @@ -181,16 +195,22 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { str.sourceText(), Integer.parseInt(numSnippets.sourceText()), Integer.parseInt(snippetLength.sourceText()), - queryBuilder() + queryBuilder ); - shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); + shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); } return new HighlighterExpressionEvaluator.Factory(shardConfigs); } - private QueryBuilder queryBuilder() { - return new MatchQueryBuilder(field.sourceText(), str.sourceText()); + @Override + public QueryBuilder queryBuilder() { + return queryBuilder; + } + + @Override + public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { + return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder); } Expression field() { @@ -208,4 +228,22 @@ Expression numSnippets() { Expression snippetLength() { return snippetLength; } + + @Override + public boolean equals(Object o) { + // Match does not serialize options, as they get included in the query builder. We need to override equals and hashcode to + // ignore options when comparing two Match functions + if (o == null || getClass() != o.getClass()) return false; + ExtractSnippets extractSnippets = (ExtractSnippets) o; + return Objects.equals(field(), extractSnippets.field()) + && Objects.equals(str(), extractSnippets.str()) + && Objects.equals(numSnippets(), extractSnippets.numSnippets()) + && Objects.equals(snippetLength(), extractSnippets.snippetLength()) + && Objects.equals(queryBuilder(), extractSnippets.queryBuilder()); + } + + @Override + public int hashCode() { + return Objects.hash(field(), str(), numSnippets(), snippetLength(), queryBuilder()); + } } From d68c2e873c12d569bd6e08508bd139373ac42401 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 4 Aug 2025 10:54:09 -0400 Subject: [PATCH 06/44] Add comments from session with Carlos --- .../fetch/subphase/highlight/HighlightPhase.java | 1 + .../xpack/esql/capabilities/RewriteableAware.java | 4 ++++ .../function/fulltext/QueryBuilderResolver.java | 1 + .../function/scalar/string/ExtractSnippets.java | 13 ++++++++++++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index cf9e8fbf7ded0..bd0bddea0261d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -66,6 +66,7 @@ public void process(HitContext hitContext) throws IOException { Map> contextBuilders = fieldContext.builders; for (String field : contextBuilders.keySet()) { FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext); + // TODO create this in ES|QL when processing matches Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType); HighlightField highlightField = highlighter.highlight(fieldContext); if (highlightField != null) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index 4c4a34b60e46e..4cb1610e5945f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -9,6 +9,10 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; +import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; public interface RewriteableAware { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index 9267d039aaf91..dd47ceb96d8d6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -77,6 +77,7 @@ private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable exceptionHolder = new Holder<>(); Holder updated = new Holder<>(false); + // TODO this needs to work with any rewriteable aware not just full text function LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> { QueryBuilder builder = f.queryBuilder(), initial = builder; builder = builder == null diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index a163417eebd4d..58ea52ec4ab0d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -18,6 +18,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -28,7 +29,10 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; +import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; import java.io.IOException; import java.util.List; @@ -44,7 +48,7 @@ /** * Extract snippets function, that extracts the most relevant snippets from a given input string */ -// TODO: Does this also need to implement TranslationAware? +// TODO: This also needs to implement TranslationAware? public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, @@ -188,8 +192,10 @@ protected NodeInfo info() { public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { List shardContexts = toEvaluator.shardContexts(); LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; + int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { + // TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders shardContext.addHighlightQuery( field.sourceText(), str.sourceText(), @@ -213,6 +219,11 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder); } + @Override + public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler); + } + Expression field() { return field; } From 05711001ad67c3ac0ee5349034467b0b7ee4ce15 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 4 Aug 2025 15:01:31 -0400 Subject: [PATCH 07/44] Make translation aware and get further down the rewrite cycle (still doesn't completely work yet) --- .../HighlighterExpressionEvaluator.java | 2 - .../fulltext/QueryBuilderResolver.java | 40 +++++++++++-------- .../scalar/string/ExtractSnippets.java | 21 ++++++---- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index aeaff93ac0a53..40e5bc9cf6364 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -63,8 +63,6 @@ public Block eval(Page page) { public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { - // TODO: Is it possible to add the highlight query here, rather than in ExtractSnippets? Would require ShardConfig having access - // to context return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index dd47ceb96d8d6..bfdf14fd3d7ff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -13,7 +13,9 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.xpack.esql.core.util.Holder; -import org.elasticsearch.xpack.esql.expression.function.scalar.string.ExtractSnippets; +import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -38,8 +40,11 @@ private QueryBuilderResolver() {} public static void resolveQueryBuilders(LogicalPlan plan, TransportActionServices services, ActionListener listener) { var hasRewriteableAwareFunctions = plan.anyMatch(p -> { Holder hasRewriteable = new Holder<>(false); - p.forEachExpression(FullTextFunction.class, unused -> hasRewriteable.set(true)); - p.forEachExpression(ExtractSnippets.class, unused -> hasRewriteable.set(true)); + p.forEachExpression(expr -> { + if (expr instanceof RewriteableAware) { + hasRewriteable.set(true); + } + }); return hasRewriteable.get(); }); if (hasRewriteableAwareFunctions) { @@ -77,20 +82,23 @@ private record FunctionsRewritable(LogicalPlan plan) implements Rewriteable exceptionHolder = new Holder<>(); Holder updated = new Holder<>(false); - // TODO this needs to work with any rewriteable aware not just full text function - LogicalPlan newPlan = plan.transformExpressionsDown(FullTextFunction.class, f -> { - QueryBuilder builder = f.queryBuilder(), initial = builder; - builder = builder == null - ? f.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() - : builder; - try { - builder = builder.rewrite(ctx); - } catch (IOException e) { - exceptionHolder.setIfAbsent(e); + LogicalPlan newPlan = plan.transformExpressionsDown(Expression.class, expr -> { + Expression finalExpression = expr; + if (expr instanceof RewriteableAware rewriteableAware && expr instanceof TranslationAware translationAware) { + QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder; + builder = builder == null + ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() + : builder; + try { + builder = builder.rewrite(ctx); + } catch (IOException e) { + exceptionHolder.setIfAbsent(e); + } + var rewritten = builder != initial; + updated.set(updated.get() || rewritten); + finalExpression = rewritten ? rewriteableAware.replaceQueryBuilder(builder) : finalExpression; } - var rewritten = builder != initial; - updated.set(updated.get() || rewritten); - return rewritten ? f.replaceQueryBuilder(builder) : f; + return finalExpression; }); if (exceptionHolder.get() != null) { throw exceptionHolder.get(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 58ea52ec4ab0d..386be5e99d37c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -7,16 +7,17 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.string; -import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator; import org.elasticsearch.compute.lucene.LuceneQueryEvaluator; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.index.query.InterceptedQueryBuilderWrapper; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -26,7 +27,6 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.Param; -import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; @@ -48,8 +48,7 @@ /** * Extract snippets function, that extracts the most relevant snippets from a given input string */ -// TODO: This also needs to implement TranslationAware? -public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware { +public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ExtractSnippets", @@ -196,6 +195,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { // TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders + shardContext.addHighlightQuery( field.sourceText(), str.sourceText(), @@ -206,7 +206,6 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); } return new HighlighterExpressionEvaluator.Factory(shardConfigs); - } @Override @@ -219,9 +218,17 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { return new ExtractSnippets(source(), field, str, numSnippets, snippetLength, queryBuilder); } + @Override + public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { + return Translatable.YES; + } + @Override public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { - return queryBuilder != null ? new TranslationAwareExpressionQuery(source(), queryBuilder) : translate(pushdownPredicates, handler); + if (queryBuilder != null) { + return new TranslationAwareExpressionQuery(source(), queryBuilder); + } + throw new IllegalStateException("Missing queryBuilder"); } Expression field() { @@ -242,8 +249,6 @@ Expression snippetLength() { @Override public boolean equals(Object o) { - // Match does not serialize options, as they get included in the query builder. We need to override equals and hashcode to - // ignore options when comparing two Match functions if (o == null || getClass() != o.getClass()) return false; ExtractSnippets extractSnippets = (ExtractSnippets) o; return Objects.equals(field(), extractSnippets.field()) From 9fe765447a14ee794458c803bb2c97f1a6a77dc1 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 4 Aug 2025 16:15:24 -0400 Subject: [PATCH 08/44] Move building highlight query to extract snippets --- .../scalar/string/ExtractSnippets.java | 52 ++++++++++++---- .../planner/EsPhysicalOperationProviders.java | 60 ++++++------------- 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 386be5e99d37c..5426d364044fd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -13,9 +13,12 @@ import org.elasticsearch.compute.lucene.HighlighterExpressionEvaluator; import org.elasticsearch.compute.lucene.LuceneQueryEvaluator; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; -import org.elasticsearch.index.query.InterceptedQueryBuilderWrapper; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -194,15 +197,44 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { - // TODO we can probably create the highlighter here instead of in EsPhysicalOperationProviders - - shardContext.addHighlightQuery( - field.sourceText(), - str.sourceText(), - Integer.parseInt(numSnippets.sourceText()), - Integer.parseInt(snippetLength.sourceText()), - queryBuilder - ); + SearchExecutionContext searchExecutionContext = shardContext.searchExecutionContext(); + SearchContext searchContext = shardContext.searchContext(); + if (searchContext == null) { + throw new IllegalStateException("Missing search context, cannot extract snippets"); + } + + try { + // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch + HighlightBuilder highlightBuilder = new HighlightBuilder(); + if (queryBuilder != null) { + highlightBuilder.highlightQuery(queryBuilder); + } + // Stripping pre/post tags as they're not useful for snippet creation + highlightBuilder.field(field.sourceText()).preTags("").postTags(""); + // Return highest scoring fragments + highlightBuilder.order(HighlightBuilder.Order.SCORE); + highlightBuilder.numOfFragments(Integer.parseInt(numSnippets.sourceText())); + highlightBuilder.fragmentSize(Integer.parseInt(snippetLength.sourceText())); + highlightBuilder.noMatchSize(Integer.parseInt(snippetLength.sourceText())); + + SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext); + searchContext.highlight(highlightContext); + + } catch (IOException e) { + throw new RuntimeException( + "Failed to create highlight context for field [" + + field.sourceText() + + "], str [" + + str.sourceText() + + "], numSnippets: [" + + Integer.parseInt(numSnippets.sourceText()) + + "], snippetLength: [" + + Integer.parseInt(snippetLength.sourceText()) + + "]", + e + ); + } + shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); } return new HighlighterExpressionEvaluator.Factory(shardConfigs); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index e37670f6bd3fc..9e7549a183a3a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -52,9 +52,8 @@ import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.search.fetch.StoredFieldsSpec; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.AliasFilter; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.sort.SortBuilder; @@ -104,6 +103,10 @@ protected void closeInternal() { } }; + public abstract SearchExecutionContext searchExecutionContext(); + + public abstract SearchContext searchContext(); + @Override public void incRef() { refCounted.incRef(); @@ -138,8 +141,6 @@ public boolean hasReferences() { * need one in ten documents. */ public abstract double storedFieldsSequentialProportion(); - - public abstract void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder); } private final List shardContexts; @@ -391,6 +392,19 @@ public DefaultShardContext(int index, Releasable releasable, SearchExecutionCont this.shardIdentifier = this.ctx.getFullyQualifiedIndex().getName() + ":" + this.ctx.getShardId(); } + @Override + public SearchExecutionContext searchExecutionContext() { + return ctx; + } + + @Override + public SearchContext searchContext() { + if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) { + return searchContext; + } + return null; + } + @Override public int index() { return index; @@ -502,44 +516,6 @@ public double storedFieldsSequentialProportion() { return EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.get(ctx.getIndexSettings().getSettings()); } - @Override - public void addHighlightQuery(String field, String str, int numSnippets, int snippetLength, QueryBuilder queryBuilder) { - try { - // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch - HighlightBuilder highlightBuilder = new HighlightBuilder(); - if (queryBuilder != null) { - highlightBuilder.highlightQuery(queryBuilder); - } - // Stripping pre/post tags as they're not useful for snippet creation - highlightBuilder.field(field).preTags("").postTags(""); - // Return highest scoring fragments - highlightBuilder.order(HighlightBuilder.Order.SCORE); - highlightBuilder.numOfFragments(numSnippets); - highlightBuilder.fragmentSize(snippetLength); - highlightBuilder.noMatchSize(snippetLength); - - SearchHighlightContext highlightContext = highlightBuilder.build(ctx); - - // Update the active SearchContext with the highlight context - if (releasable instanceof org.elasticsearch.search.internal.SearchContext searchContext) { - searchContext.highlight(highlightContext); - } - } catch (IOException e) { - throw new RuntimeException( - "Failed to create highlight context for field [" - + field - + "], str [" - + str - + "], numSnippets: [" - + numSnippets - + "], snippetLength: [" - + snippetLength - + "]", - e - ); - } - } - @Override public void close() { releasable.close(); From 8adea5654ae9238707e45eaeac9bbdf67e46a95d Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 4 Aug 2025 18:05:00 +0200 Subject: [PATCH 09/44] Cherry-pick: Initial incomplete work for creating the Highlighter in the expression evaluator --- .../highlight/SearchHighlightContext.java | 10 +- .../HighlighterExpressionEvaluator.java | 95 +++++++++++++++++-- .../compute/lucene/LuceneQueryEvaluator.java | 30 ++++-- .../LuceneQueryExpressionEvaluator.java | 5 +- .../lucene/LuceneQueryScoreEvaluator.java | 4 +- .../LuceneQueryExpressionEvaluatorTests.java | 2 +- .../LuceneQueryScoreEvaluatorTests.java | 2 +- 7 files changed, 123 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java index 631a75a355abf..c04b52d747132 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java @@ -40,7 +40,7 @@ public static class Field { private final String field; private final FieldOptions fieldOptions; - Field(String field, FieldOptions fieldOptions) { + public Field(String field, FieldOptions fieldOptions) { assert field != null; assert fieldOptions != null; this.field = field; @@ -185,16 +185,16 @@ public Map options() { return options; } - static class Builder { + public static class Builder { private final FieldOptions fieldOptions = new FieldOptions(); - Builder fragmentCharSize(int fragmentCharSize) { + public Builder fragmentCharSize(int fragmentCharSize) { fieldOptions.fragmentCharSize = fragmentCharSize; return this; } - Builder numberOfFragments(int numberOfFragments) { + public Builder numberOfFragments(int numberOfFragments) { fieldOptions.numberOfFragments = numberOfFragments; return this; } @@ -294,7 +294,7 @@ Builder options(Map options) { return this; } - FieldOptions build() { + public FieldOptions build() { return fieldOptions; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 40e5bc9cf6364..d7e406b7ab75a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -7,6 +7,8 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.BytesRef; @@ -17,15 +19,46 @@ import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; +import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.fetch.FetchContext; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.lookup.Source; import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collections; +import java.util.Map; +import java.util.function.Supplier; public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator { - HighlighterExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shardConfigs) { + private final String fieldName; + private final SearchContext searchContext; + private final SourceLoader sourceLoader; + + HighlighterExpressionEvaluator( + BlockFactory blockFactory, + ShardConfig[] shardConfigs, + String fieldName, + SearchContext searchContext, + SourceLoader sourceLoader + ) { super(blockFactory, shardConfigs); + this.fieldName = fieldName; + this.searchContext = searchContext; + this.sourceLoader = sourceLoader; } @Override @@ -44,15 +77,62 @@ protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, } @Override - protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer) throws IOException { - // TODO: add snippets here - builder.appendBytesRef(new BytesRef("I am a snippet")); // Placeholder for actual highlighted text + protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { + + // I was trying to find the way to build the highligher from the context, but probably we should just build the + // CustomUnifiedHighligher directly so we don't need specific fetch phase classes for this + SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); + optionsBuilder.numberOfFragments(10); + optionsBuilder.fragmentCharSize(100); + SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); + FetchContext fetchContext = new FetchContext(searchContext, sourceLoader); + MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName); + SearchHit searchHit = new SearchHit(docId); + Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); + + + FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext( + searchHit, + leafReaderContext, + docId, + Map.of(), + source, + null + ); + FieldHighlightContext highlightContext = new FieldHighlightContext( + fieldName, + field, + fieldType, + fetchContext, + hitContext, + query, + Map.of() + ); + Highlighter highlighter = new DefaultHighlighter(); + HighlightField highlight = highlighter.highlight(highlightContext); + + // Iterate over fragments etc + builder.appendBytesRef(new BytesRef(highlight.fragments()[0].bytes().bytes())); + } + + private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) { + return () -> { + StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet()); + try { + LeafStoredFieldLoader leafRootLoader = rootLoader.getLoader(ctx, null); + leafRootLoader.advanceTo(doc); + return Source.fromBytes(leafRootLoader.source()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; } @Override protected void appendNoMatch(BytesRefVector.Builder builder) { - // NOTE: Carlos originally suggested that we add null here, but that doesn't work - errors on missing key - builder.appendBytesRef(new BytesRef()); + + } @Override @@ -63,7 +143,8 @@ public Block eval(Page page) { public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { - return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs); + // We need to get field name, search context, and source loader. We should be able to remove the source loader by getting the field value + return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs, fieldName, searchContext, context.sourceLoader()); } } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index d91df60621fce..e6eae5109f264 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -17,7 +17,6 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; -import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.DocBlock; @@ -49,7 +48,7 @@ public abstract class LuceneQueryEvaluator implements public record ShardConfig(Query query, IndexSearcher searcher) {} private final BlockFactory blockFactory; - private final ShardConfig[] shards; + protected final ShardConfig[] shards; private final List perShardState; @@ -266,8 +265,11 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { min, max, scoreBuilder, + ctx, LuceneQueryEvaluator.this::appendNoMatch, - LuceneQueryEvaluator.this::appendMatch + (builder, scorer1, docId, ctc, query) -> + LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query), + weight.getQuery() ) ) { bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1); @@ -308,12 +310,12 @@ private void initScorer(int minDocId) throws IOException { private void scoreSingleDocWithScorer(T builder, int doc) throws IOException { if (scorer.iterator().docID() == doc) { - appendMatch(builder, scorer); + appendMatch(builder, scorer, doc, ctx, weight.getQuery()); } else if (scorer.iterator().docID() > doc) { appendNoMatch(builder); } else { if (scorer.iterator().advance(doc) == doc) { - appendMatch(builder, scorer); + appendMatch(builder, scorer, doc, ctx, weight.getQuery()); } else { appendNoMatch(builder); } @@ -321,6 +323,11 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException { } } + @FunctionalInterface + public interface MatchAppender { + void accept(T t, U u, int docId, LeafReaderContext leafReaderContext, Query query) throws E; + } + /** * Collects matching information for dense range of doc ids. This assumes that * doc ids are sent to {@link LeafCollector#collect(int)} in ascending order @@ -329,8 +336,10 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException { static class DenseCollector implements LeafCollector, Releasable { private final U scoreBuilder; private final int max; + private final LeafReaderContext leafReaderContext; private final Consumer appendNoMatch; - private final CheckedBiConsumer appendMatch; + private final MatchAppender appendMatch; + private final Query query; private Scorable scorer; int next; @@ -339,14 +348,17 @@ static class DenseCollector implements LeafCollector, int min, int max, U scoreBuilder, + LeafReaderContext leafReaderContext, Consumer appendNoMatch, - CheckedBiConsumer appendMatch + MatchAppender appendMatch, Query query ) { this.scoreBuilder = scoreBuilder; this.max = max; next = min; + this.leafReaderContext = leafReaderContext; this.appendNoMatch = appendNoMatch; this.appendMatch = appendMatch; + this.query = query; } @Override @@ -359,7 +371,7 @@ public void collect(int doc) throws IOException { while (next++ < doc) { appendNoMatch.accept(scoreBuilder); } - appendMatch.accept(scoreBuilder, scorer); + appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query); } public Vector build() { @@ -397,7 +409,7 @@ public void close() { /** * Appends a matching result to a builder created by @link createVectorBuilder} */ - protected abstract void appendMatch(T builder, Scorable scorer) throws IOException; + protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException; /** * Appends a non matching result to a builder created by @link createVectorBuilder} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java index 73eae67819907..9c65d89782b93 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; @@ -17,6 +18,7 @@ import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.index.mapper.IdLoader; import java.io.IOException; @@ -60,7 +62,8 @@ protected void appendNoMatch(BooleanVector.Builder builder) { } @Override - protected void appendMatch(BooleanVector.Builder builder, Scorable scorer) throws IOException { + protected void appendMatch(BooleanVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { builder.appendBoolean(true); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java index cc7b9d9878fa1..1b422b4443e5d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java @@ -7,6 +7,7 @@ package org.elasticsearch.compute.lucene; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; @@ -61,7 +62,8 @@ protected void appendNoMatch(DoubleVector.Builder builder) { } @Override - protected void appendMatch(DoubleVector.Builder builder, Scorable scorer) throws IOException { + protected void appendMatch(DoubleVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException { builder.appendDouble(scorer.score()); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java index ba9e62abb8b35..9a0016f60ba11 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java @@ -28,7 +28,7 @@ protected DenseCollector createDenseCollector(int min, in max, blockFactory().newBooleanVectorFixedBuilder(max - min + 1), b -> b.appendBoolean(false), - (b, s) -> b.appendBoolean(true) + (b, s, d) -> b.appendBoolean(true) ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java index 53fa3f775386c..c1797f2e22724 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java @@ -32,7 +32,7 @@ protected LuceneQueryEvaluator.DenseCollector createDenseC max, blockFactory().newDoubleVectorFixedBuilder(max - min + 1), b -> b.appendDouble(NO_MATCH_SCORE), - (b, s) -> b.appendDouble(s.score()) + (b, s, d) -> b.appendDouble(s.score()) ); } From 6be55b470513662bad1b7314dd1e8611b943a95f Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 6 Aug 2025 11:17:06 -0400 Subject: [PATCH 10/44] Hack in highlighter so it actually produces a response --- .../highlight/DefaultHighlighter.java | 10 ++- .../highlight/SearchHighlightContext.java | 8 +-- .../compute/src/main/java/module-info.java | 1 + .../HighlighterExpressionEvaluator.java | 69 ++++++++++++------- .../scalar/string/ExtractSnippets.java | 7 +- 5 files changed, 61 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 3efbcd15140e5..927ad0f5ad434 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -60,9 +60,13 @@ public boolean canHighlight(MappedFieldType fieldType) { @Override public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { @SuppressWarnings("unchecked") - Map cache = (Map) fieldContext.cache.computeIfAbsent( + // Map cache = (Map) fieldContext.cache.computeIfAbsent( + // UnifiedHighlighter.class.getName(), + // k -> new HashMap<>() + // ); + Map cache = (Map) fieldContext.cache.getOrDefault( UnifiedHighlighter.class.getName(), - k -> new HashMap<>() + new HashMap<>() ); if (cache.containsKey(fieldContext.fieldName) == false) { cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); @@ -114,7 +118,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings(); - Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") + Encoder encoder = "html".equals(fieldContext.field.fieldOptions().encoder()) ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java index c04b52d747132..f06c667a073ba 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java @@ -209,17 +209,17 @@ Builder encoder(String encoder) { return this; } - Builder preTags(String[] preTags) { + public Builder preTags(String[] preTags) { fieldOptions.preTags = preTags; return this; } - Builder postTags(String[] postTags) { + public Builder postTags(String[] postTags) { fieldOptions.postTags = postTags; return this; } - Builder scoreOrdered(boolean scoreOrdered) { + public Builder scoreOrdered(boolean scoreOrdered) { fieldOptions.scoreOrdered = scoreOrdered; return this; } @@ -229,7 +229,7 @@ Builder highlightFilter(boolean highlightFilter) { return this; } - Builder requireFieldMatch(boolean requireFieldMatch) { + public Builder requireFieldMatch(boolean requireFieldMatch) { fieldOptions.requireFieldMatch = requireFieldMatch; return this; } diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java index f21ed72d7eb21..5504e48d74636 100644 --- a/x-pack/plugin/esql/compute/src/main/java/module-info.java +++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java @@ -21,6 +21,7 @@ requires org.elasticsearch.geo; requires org.elasticsearch.xcore; requires hppc; + requires org.apache.lucene.highlighter; exports org.elasticsearch.compute; exports org.elasticsearch.compute.aggregation; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index d7e406b7ab75a..a15e91709fe4b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -28,15 +28,18 @@ import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.xcontent.Text; import java.io.IOException; import java.io.UncheckedIOException; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.function.Supplier; @@ -45,20 +48,23 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator() ); Highlighter highlighter = new DefaultHighlighter(); HighlightField highlight = highlighter.highlight(highlightContext); - // Iterate over fragments etc - builder.appendBytesRef(new BytesRef(highlight.fragments()[0].bytes().bytes())); + // TODO: Even when I have 2 fragments coming back, it's only ever returning the first bytes ref vector. Is this the appropriate data + // structure? + for (Text highlightText : highlight.fragments()) { + builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes())); + } } private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) { @@ -131,8 +137,7 @@ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, in @Override protected void appendNoMatch(BytesRefVector.Builder builder) { - - + // builder.appendBytesRef(new BytesRef()); } @Override @@ -140,11 +145,23 @@ public Block eval(Page page) { return executeQuery(page); } - public record Factory(ShardConfig[] shardConfigs) implements EvalOperator.ExpressionEvaluator.Factory { + public record Factory( + ShardConfig[] shardConfigs, + String fieldName, + Integer numFragments, + Integer fragmentSize, + SearchContext searchContext + ) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { - // We need to get field name, search context, and source loader. We should be able to remove the source loader by getting the field value - return new HighlighterExpressionEvaluator(context.blockFactory(), shardConfigs, fieldName, searchContext, context.sourceLoader()); + return new HighlighterExpressionEvaluator( + context.blockFactory(), + shardConfigs, + fieldName, + numFragments, + fragmentSize, + searchContext + ); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 5426d364044fd..6a2018c61d335 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -237,7 +237,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); } - return new HighlighterExpressionEvaluator.Factory(shardConfigs); + // Get field name and search context from the first shard context + String fieldNameStr = field.sourceText(); + int numFragments = numSnippets == null ? DEFAULT_NUM_SNIPPETS : Integer.parseInt(numSnippets.sourceText()); + int fragmentSize = snippetLength == null ? DEFAULT_SNIPPET_LENGTH : Integer.parseInt(snippetLength.sourceText()); + SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext(); + return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numFragments, fragmentSize, firstSearchContext); } @Override From 60e3ce6cb3da4c5f2f75389430b804f4a5200659 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 7 Aug 2025 20:23:50 +0000 Subject: [PATCH 11/44] [CI] Auto commit changes from spotless --- .../compute/lucene/LuceneQueryEvaluator.java | 9 +++++---- .../compute/lucene/LuceneQueryExpressionEvaluator.java | 1 - .../xpack/esql/plugin/ExtractSnippetsIT.java | 2 -- .../xpack/esql/capabilities/RewriteableAware.java | 4 ---- .../function/fulltext/QueryBuilderResolver.java | 2 +- 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index e6eae5109f264..13eb580476cd9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -267,8 +267,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { scoreBuilder, ctx, LuceneQueryEvaluator.this::appendNoMatch, - (builder, scorer1, docId, ctc, query) -> - LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query), + (builder, scorer1, docId, ctc, query) -> LuceneQueryEvaluator.this.appendMatch(builder, scorer1, docId, ctx, query), weight.getQuery() ) ) { @@ -350,7 +349,8 @@ static class DenseCollector implements LeafCollector, U scoreBuilder, LeafReaderContext leafReaderContext, Consumer appendNoMatch, - MatchAppender appendMatch, Query query + MatchAppender appendMatch, + Query query ) { this.scoreBuilder = scoreBuilder; this.max = max; @@ -409,7 +409,8 @@ public void close() { /** * Appends a matching result to a builder created by @link createVectorBuilder} */ - protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException; + protected abstract void appendMatch(T builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + throws IOException; /** * Appends a non matching result to a builder created by @link createVectorBuilder} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java index 9c65d89782b93..e81cbe3183a9b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java @@ -18,7 +18,6 @@ import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.index.mapper.IdLoader; import java.io.IOException; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java index 4b5affa2f6fc3..aea176b48e000 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java @@ -41,8 +41,6 @@ public void testExtractSnippets() { } } - - static void createAndPopulateIndex(Consumer ensureYellow) { var indexName = "test"; var client = client().admin().indices(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index 4cb1610e5945f..4c4a34b60e46e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -9,10 +9,6 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.querydsl.query.Query; -import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; -import org.elasticsearch.xpack.esql.planner.TranslatorHandler; -import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; public interface RewriteableAware { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index bfdf14fd3d7ff..159d179e9fd7b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -12,10 +12,10 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; -import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; From b6fb4f3383fc32f69c5d7ab34907b990de40373e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 11 Aug 2025 12:20:39 +0200 Subject: [PATCH 12/44] Change LuceneQueryEvaluator to use Blocks instead of Vectors to make it multivalue aware --- .../HighlighterExpressionEvaluator.java | 28 ++++++++------ .../compute/lucene/LuceneQueryEvaluator.java | 38 +++++++++---------- .../LuceneQueryExpressionEvaluator.java | 16 ++++---- .../lucene/LuceneQueryScoreEvaluator.java | 15 ++++---- .../lucene/LuceneQueryEvaluatorTests.java | 9 ++--- .../LuceneQueryExpressionEvaluatorTests.java | 17 +++++---- .../LuceneQueryScoreEvaluatorTests.java | 16 ++++---- 7 files changed, 73 insertions(+), 66 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index a15e91709fe4b..e7112249d89b6 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -14,9 +14,8 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; @@ -43,7 +42,7 @@ import java.util.Map; import java.util.function.Supplier; -public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator +public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator { @@ -73,17 +72,17 @@ protected ScoreMode scoreMode() { } @Override - protected Vector createNoMatchVector(BlockFactory blockFactory, int size) { - return blockFactory.newConstantBytesRefVector(new BytesRef(), size); + protected Block createNoMatchBlock(BlockFactory blockFactory, int size) { + return blockFactory.newConstantNullBlock(size); } @Override - protected BytesRefVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) { - return blockFactory.newBytesRefVectorBuilder(size * numFragments); + protected BytesRefBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) { + return blockFactory.newBytesRefBlockBuilder(size * numFragments); } @Override - protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException { // TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes? @@ -115,11 +114,16 @@ protected void appendMatch(BytesRefVector.Builder builder, Scorable scorer, int Highlighter highlighter = new DefaultHighlighter(); HighlightField highlight = highlighter.highlight(highlightContext); - // TODO: Even when I have 2 fragments coming back, it's only ever returning the first bytes ref vector. Is this the appropriate data - // structure? + boolean multivalued = highlight.fragments().length > 1; + if (multivalued) { + builder.beginPositionEntry(); + } for (Text highlightText : highlight.fragments()) { builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes())); } + if (multivalued) { + builder.endPositionEntry(); + } } private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) { @@ -136,8 +140,8 @@ private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, in } @Override - protected void appendNoMatch(BytesRefVector.Builder builder) { - // builder.appendBytesRef(new BytesRef()); + protected void appendNoMatch(BytesRefBlock.Builder builder) { + builder.appendNull(); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index 13eb580476cd9..ad05d27b8f42d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -23,7 +23,6 @@ import org.elasticsearch.compute.data.DocVector; import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.data.Vector; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -43,7 +42,7 @@ * It's much faster to push queries to the {@link LuceneSourceOperator} or the like, but sometimes this isn't possible. So * this class is here to save the day. */ -public abstract class LuceneQueryEvaluator implements Releasable { +public abstract class LuceneQueryEvaluator implements Releasable { public record ShardConfig(Query query, IndexSearcher searcher) {} @@ -66,9 +65,9 @@ public Block executeQuery(Page page) { DocVector docs = (DocVector) block.asVector(); try { if (docs.singleSegmentNonDecreasing()) { - return evalSingleSegmentNonDecreasing(docs).asBlock(); + return evalSingleSegmentNonDecreasing(docs); } else { - return evalSlow(docs).asBlock(); + return evalSlow(docs); } } catch (IOException e) { throw new UncheckedIOException(e); @@ -105,15 +104,15 @@ public Block executeQuery(Page page) { * common. *

*/ - private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException { + private Block evalSingleSegmentNonDecreasing(DocVector docs) throws IOException { ShardState shardState = shardState(docs.shards().getInt(0)); SegmentState segmentState = shardState.segmentState(docs.segments().getInt(0)); int min = docs.docs().getInt(0); int max = docs.docs().getInt(docs.getPositionCount() - 1); int length = max - min + 1; - try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) { + try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) { if (length == docs.getPositionCount() && length > 1) { - return segmentState.scoreDense(scoreBuilder, min, max); + return segmentState.scoreDense(scoreBuilder, min, max, docs.getPositionCount()); } return segmentState.scoreSparse(scoreBuilder, docs.docs()); } @@ -133,13 +132,13 @@ private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException * the order that the {@link DocVector} came in. *

*/ - private Vector evalSlow(DocVector docs) throws IOException { + private Block evalSlow(DocVector docs) throws IOException { int[] map = docs.shardSegmentDocMapForwards(); // Clear any state flags from the previous run int prevShard = -1; int prevSegment = -1; SegmentState segmentState = null; - try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) { + try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) { for (int i = 0; i < docs.getPositionCount(); i++) { int shard = docs.shards().getInt(docs.shards().getInt(map[i])); int segment = docs.segments().getInt(map[i]); @@ -155,7 +154,7 @@ private Vector evalSlow(DocVector docs) throws IOException { segmentState.scoreSingleDocWithScorer(scoreBuilder, docs.docs().getInt(map[i])); } } - try (Vector outOfOrder = scoreBuilder.build()) { + try (Block outOfOrder = scoreBuilder.build()) { return outOfOrder.filter(docs.shardSegmentDocMapBackwards()); } } @@ -246,9 +245,9 @@ private SegmentState(Weight weight, LeafReaderContext ctx) { * Score a range using the {@link BulkScorer}. This should be faster * than using {@link #scoreSparse} for dense doc ids. */ - Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { + Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOException { if (noMatch) { - return createNoMatchVector(blockFactory, max - min + 1); + return createNoMatchBlock(blockFactory, max - min + 1); } if (bulkScorer == null || // The bulkScorer wasn't initialized Thread.currentThread() != bulkScorerThread // The bulkScorer was initialized on a different thread @@ -257,7 +256,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { bulkScorer = weight.bulkScorer(ctx); if (bulkScorer == null) { noMatch = true; - return createNoMatchVector(blockFactory, max - min + 1); + return createNoMatchBlock(blockFactory, positionCount); } } try ( @@ -272,6 +271,7 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { ) ) { bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1); + collector.finish(); return collector.build(); } } @@ -280,10 +280,10 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException { * Score a vector of doc ids using {@link Scorer}. If you have a dense range of * doc ids it'd be faster to use {@link #scoreDense}. */ - Vector scoreSparse(T scoreBuilder, IntVector docs) throws IOException { + Block scoreSparse(T scoreBuilder, IntVector docs) throws IOException { initScorer(docs.getInt(0)); if (noMatch) { - return createNoMatchVector(blockFactory, docs.getPositionCount()); + return createNoMatchBlock(blockFactory, docs.getPositionCount()); } for (int i = 0; i < docs.getPositionCount(); i++) { scoreSingleDocWithScorer(scoreBuilder, docs.getInt(i)); @@ -332,7 +332,7 @@ public interface MatchAppender { * doc ids are sent to {@link LeafCollector#collect(int)} in ascending order * which isn't documented, but @jpountz swears is true. */ - static class DenseCollector implements LeafCollector, Releasable { + static class DenseCollector implements LeafCollector, Releasable { private final U scoreBuilder; private final int max; private final LeafReaderContext leafReaderContext; @@ -374,7 +374,7 @@ public void collect(int doc) throws IOException { appendMatch.accept(scoreBuilder, scorer, doc, leafReaderContext, query); } - public Vector build() { + public Block build() { return scoreBuilder.build(); } @@ -399,12 +399,12 @@ public void close() { /** * Creates a vector where all positions correspond to elements that don't match the query */ - protected abstract Vector createNoMatchVector(BlockFactory blockFactory, int size); + protected abstract Block createNoMatchBlock(BlockFactory blockFactory, int size); /** * Creates the corresponding vector builder to store the results of evaluating the query */ - protected abstract T createVectorBuilder(BlockFactory blockFactory, int size); + protected abstract T createBlockBuilder(BlockFactory blockFactory, int size); /** * Appends a matching result to a builder created by @link createVectorBuilder} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java index e81cbe3183a9b..c08f9b8822925 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java @@ -13,9 +13,9 @@ import org.apache.lucene.search.ScoreMode; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BooleanVector; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; @@ -27,7 +27,7 @@ * a {@link BooleanVector}. * @see LuceneQueryScoreEvaluator */ -public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator +public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator { @@ -46,22 +46,22 @@ protected ScoreMode scoreMode() { } @Override - protected Vector createNoMatchVector(BlockFactory blockFactory, int size) { - return blockFactory.newConstantBooleanVector(false, size); + protected Block createNoMatchBlock(BlockFactory blockFactory, int size) { + return blockFactory.newConstantBooleanBlockWith(false, size); } @Override - protected BooleanVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) { - return blockFactory.newBooleanVectorFixedBuilder(size); + protected BooleanBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) { + return blockFactory.newBooleanBlockBuilder(size); } @Override - protected void appendNoMatch(BooleanVector.Builder builder) { + protected void appendNoMatch(BooleanBlock.Builder builder) { builder.appendBoolean(false); } @Override - protected void appendMatch(BooleanVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException { builder.appendBoolean(true); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java index 1b422b4443e5d..88b5721a6fdf9 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java @@ -15,7 +15,6 @@ import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.DoubleVector; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.ScoreOperator; @@ -28,7 +27,7 @@ * Elements that don't match will have a score of {@link #NO_MATCH_SCORE}. * @see LuceneQueryScoreEvaluator */ -public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator implements ScoreOperator.ExpressionScorer { +public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator implements ScoreOperator.ExpressionScorer { public static final double NO_MATCH_SCORE = 0.0; @@ -47,22 +46,22 @@ protected ScoreMode scoreMode() { } @Override - protected Vector createNoMatchVector(BlockFactory blockFactory, int size) { - return blockFactory.newConstantDoubleVector(NO_MATCH_SCORE, size); + protected DoubleBlock createNoMatchBlock(BlockFactory blockFactory, int size) { + return blockFactory.newConstantDoubleBlockWith(NO_MATCH_SCORE, size); } @Override - protected DoubleVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) { - return blockFactory.newDoubleVectorFixedBuilder(size); + protected DoubleBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) { + return blockFactory.newDoubleBlockBuilder(size); } @Override - protected void appendNoMatch(DoubleVector.Builder builder) { + protected void appendNoMatch(DoubleBlock.Builder builder) { builder.appendDouble(NO_MATCH_SCORE); } @Override - protected void appendMatch(DoubleVector.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) + protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query) throws IOException { builder.appendDouble(scorer.score()); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java index 2ef64623daa74..cc7d25342b6ee 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluatorTests.java @@ -25,14 +25,13 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.compute.OperatorTests; +import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.DocBlock; import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.data.Vector; import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator; import org.elasticsearch.compute.operator.Driver; import org.elasticsearch.compute.operator.DriverContext; @@ -59,7 +58,7 @@ /** * Base class for testing Lucene query evaluators. */ -public abstract class LuceneQueryEvaluatorTests extends ComputeTestCase { +public abstract class LuceneQueryEvaluatorTests extends ComputeTestCase { private static final String FIELD = "g"; @@ -168,9 +167,9 @@ protected void assertTermsQuery(List results, Set matching, int ex int matchCount = 0; for (Page page : results) { int initialBlockIndex = termsBlockIndex(page); - BytesRefVector terms = page.getBlock(initialBlockIndex).asVector(); + BytesRefBlock terms = page.getBlock(initialBlockIndex); @SuppressWarnings("unchecked") - T resultVector = (T) page.getBlock(resultsBlockIndex(page)).asVector(); + T resultVector = (T) page.getBlock(resultsBlockIndex(page)); for (int i = 0; i < page.getPositionCount(); i++) { BytesRef termAtPosition = terms.getBytesRef(i, new BytesRef()); boolean isMatch = matching.contains(termAtPosition.utf8ToString()); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java index 9a0016f60ba11..f6808962fcdf6 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java @@ -8,8 +8,9 @@ package org.elasticsearch.compute.lucene; import org.apache.lucene.search.Scorable; +import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.lucene.LuceneQueryEvaluator.DenseCollector; import org.elasticsearch.compute.operator.EvalOperator; @@ -17,18 +18,20 @@ import static org.hamcrest.Matchers.equalTo; -public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests { +public class LuceneQueryExpressionEvaluatorTests extends LuceneQueryEvaluatorTests { private final boolean useScoring = randomBoolean(); @Override - protected DenseCollector createDenseCollector(int min, int max) { + protected DenseCollector createDenseCollector(int min, int max) { return new LuceneQueryEvaluator.DenseCollector<>( min, max, - blockFactory().newBooleanVectorFixedBuilder(max - min + 1), + blockFactory().newBooleanBlockBuilder(max - min + 1), + null, b -> b.appendBoolean(false), - (b, s, d) -> b.appendBoolean(true) + (b, s, d, lr, q) -> b.appendBoolean(true), + null ); } @@ -54,12 +57,12 @@ protected int resultsBlockIndex(Page page) { } @Override - protected void assertCollectedResultMatch(BooleanVector resultVector, int position, boolean isMatch) { + protected void assertCollectedResultMatch(BooleanBlock resultVector, int position, boolean isMatch) { assertThat(resultVector.getBoolean(position), equalTo(isMatch)); } @Override - protected void assertTermResultMatch(BooleanVector resultVector, int position, boolean isMatch) { + protected void assertTermResultMatch(BooleanBlock resultVector, int position, boolean isMatch) { assertThat(resultVector.getBoolean(position), equalTo(isMatch)); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java index c1797f2e22724..af162db91978f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluatorTests.java @@ -9,7 +9,7 @@ import org.apache.lucene.search.Scorable; import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.DoubleVector; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.Operator; import org.elasticsearch.compute.operator.ScoreOperator; @@ -20,19 +20,21 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -public class LuceneQueryScoreEvaluatorTests extends LuceneQueryEvaluatorTests { +public class LuceneQueryScoreEvaluatorTests extends LuceneQueryEvaluatorTests { private static final float TEST_SCORE = 1.5f; private static final Double DEFAULT_SCORE = 1.0; @Override - protected LuceneQueryEvaluator.DenseCollector createDenseCollector(int min, int max) { + protected LuceneQueryEvaluator.DenseCollector createDenseCollector(int min, int max) { return new LuceneQueryEvaluator.DenseCollector<>( min, max, - blockFactory().newDoubleVectorFixedBuilder(max - min + 1), + blockFactory().newDoubleBlockBuilder(max - min + 1), + null, b -> b.appendDouble(NO_MATCH_SCORE), - (b, s, d) -> b.appendDouble(s.score()) + (b, s, d, lr, q) -> b.appendDouble(s.score()), + null ); } @@ -63,7 +65,7 @@ protected int resultsBlockIndex(Page page) { } @Override - protected void assertCollectedResultMatch(DoubleVector resultVector, int position, boolean isMatch) { + protected void assertCollectedResultMatch(DoubleBlock resultVector, int position, boolean isMatch) { if (isMatch) { assertThat(resultVector.getDouble(position), equalTo((double) TEST_SCORE)); } else { @@ -73,7 +75,7 @@ protected void assertCollectedResultMatch(DoubleVector resultVector, int positio } @Override - protected void assertTermResultMatch(DoubleVector resultVector, int position, boolean isMatch) { + protected void assertTermResultMatch(DoubleBlock resultVector, int position, boolean isMatch) { if (isMatch) { assertThat(resultVector.getDouble(position), greaterThan(DEFAULT_SCORE)); } else { From f6a807986c080b7ed02efb9e713c9ee21f2a9846 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 11 Aug 2025 14:00:20 +0200 Subject: [PATCH 13/44] Add rewritability --- .../esql/capabilities/RewriteableAware.java | 2 +- .../scalar/string/ExtractSnippets.java | 28 +++++++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index 4c4a34b60e46e..d4142833298c7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -10,7 +10,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; -public interface RewriteableAware { +public interface RewriteableAware extends TranslationAware{ QueryBuilder queryBuilder(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 6a2018c61d335..8116514188a53 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -22,10 +22,13 @@ import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.Check; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; @@ -35,10 +38,12 @@ import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.MatchQuery; import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Objects; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; @@ -47,11 +52,13 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.fieldAsFieldAttribute; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.getNameFromFieldAttribute; /** * Extract snippets function, that extracts the most relevant snippets from a given input string */ -public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware { +public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware, EvaluatorMapper { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ExtractSnippets", @@ -257,15 +264,24 @@ public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { @Override public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { - return Translatable.YES; + // We don't want pushdown for this function, as it is not a filter query + return Translatable.NO; } @Override public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { - if (queryBuilder != null) { - return new TranslationAwareExpressionQuery(source(), queryBuilder); - } - throw new IllegalStateException("Missing queryBuilder"); + return queryBuilder != null + ? new TranslationAwareExpressionQuery(source(), queryBuilder()) + : translate(pushdownPredicates, handler); + } + + private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + var fieldAttribute = fieldAsFieldAttribute(field()); + Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument"); + String fieldName = getNameFromFieldAttribute(fieldAttribute); + Object query = str().fold(FoldContext.small()); + // Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided + return new MatchQuery(source(), fieldName, query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true)); } Expression field() { From 1ca0b583eb9bad45ff8ff4e370fd80b39a620fc8 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 11 Aug 2025 14:12:56 +0200 Subject: [PATCH 14/44] Solve params via fold --- .../scalar/string/ExtractSnippets.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 8116514188a53..6570cda02f87d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -202,6 +202,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { List shardContexts = toEvaluator.shardContexts(); LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; + Integer numSnippets = this.numSnippets == null ?DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); + Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small()); + int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { SearchExecutionContext searchExecutionContext = shardContext.searchExecutionContext(); @@ -210,6 +213,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { throw new IllegalStateException("Missing search context, cannot extract snippets"); } + try { // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch HighlightBuilder highlightBuilder = new HighlightBuilder(); @@ -220,9 +224,10 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { highlightBuilder.field(field.sourceText()).preTags("").postTags(""); // Return highest scoring fragments highlightBuilder.order(HighlightBuilder.Order.SCORE); - highlightBuilder.numOfFragments(Integer.parseInt(numSnippets.sourceText())); - highlightBuilder.fragmentSize(Integer.parseInt(snippetLength.sourceText())); - highlightBuilder.noMatchSize(Integer.parseInt(snippetLength.sourceText())); + + highlightBuilder.numOfFragments(numSnippets); + highlightBuilder.fragmentSize(snippedSize); + highlightBuilder.noMatchSize(snippedSize); SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext); searchContext.highlight(highlightContext); @@ -234,9 +239,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + "], str [" + str.sourceText() + "], numSnippets: [" - + Integer.parseInt(numSnippets.sourceText()) + + numSnippets + "], snippetLength: [" - + Integer.parseInt(snippetLength.sourceText()) + + snippetLength + "]", e ); @@ -246,10 +251,8 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { } // Get field name and search context from the first shard context String fieldNameStr = field.sourceText(); - int numFragments = numSnippets == null ? DEFAULT_NUM_SNIPPETS : Integer.parseInt(numSnippets.sourceText()); - int fragmentSize = snippetLength == null ? DEFAULT_SNIPPET_LENGTH : Integer.parseInt(snippetLength.sourceText()); SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext(); - return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numFragments, fragmentSize, firstSearchContext); + return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numSnippets, snippedSize, firstSearchContext); } @Override From 34c10f5c80bfc2187646e63db721c451f5246595 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 11 Aug 2025 14:13:34 +0200 Subject: [PATCH 15/44] Use SORT to push down the EVAL clause, so it's executed on local nodes --- .../org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java index aea176b48e000..1637c2476bd38 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java @@ -31,6 +31,7 @@ public void testExtractSnippets() { var query = """ FROM test | EVAL x = extract_snippets(content, "fox", 1, 10) + | SORT x | KEEP x """; From 02cebe76df477f85a337e0520ce03e29df915211 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 11 Aug 2025 19:50:55 +0000 Subject: [PATCH 16/44] [CI] Auto commit changes from spotless --- .../compute/lucene/LuceneQueryExpressionEvaluator.java | 4 +--- .../compute/lucene/LuceneQueryExpressionEvaluatorTests.java | 1 - .../xpack/esql/capabilities/RewriteableAware.java | 2 +- .../expression/function/scalar/string/ExtractSnippets.java | 3 +-- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java index c08f9b8822925..c249620060685 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java @@ -27,9 +27,7 @@ * a {@link BooleanVector}. * @see LuceneQueryScoreEvaluator */ -public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator - implements - EvalOperator.ExpressionEvaluator { +public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator { LuceneQueryExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shards) { super(blockFactory, shards); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java index f6808962fcdf6..616679669b46f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java @@ -8,7 +8,6 @@ package org.elasticsearch.compute.lucene; import org.apache.lucene.search.Scorable; -import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.Page; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index d4142833298c7..097bee3a89343 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -10,7 +10,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; -public interface RewriteableAware extends TranslationAware{ +public interface RewriteableAware extends TranslationAware { QueryBuilder queryBuilder(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 6570cda02f87d..78972373896b2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -202,7 +202,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { List shardContexts = toEvaluator.shardContexts(); LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; - Integer numSnippets = this.numSnippets == null ?DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); + Integer numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small()); int i = 0; @@ -213,7 +213,6 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { throw new IllegalStateException("Missing search context, cannot extract snippets"); } - try { // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch HighlightBuilder highlightBuilder = new HighlightBuilder(); From b923a2ec800c40c701974c4750548f27385fd5de Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 12 Aug 2025 14:32:14 -0400 Subject: [PATCH 17/44] Workaround for rewrite --- .../highlight/DefaultHighlighter.java | 4 -- .../HighlighterExpressionEvaluator.java | 43 ++++++++++++------- .../fulltext/QueryBuilderResolver.java | 3 +- .../scalar/string/ExtractSnippets.java | 5 ++- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 927ad0f5ad434..0424cdfc7e098 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -60,10 +60,6 @@ public boolean canHighlight(MappedFieldType fieldType) { @Override public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { @SuppressWarnings("unchecked") - // Map cache = (Map) fieldContext.cache.computeIfAbsent( - // UnifiedHighlighter.class.getName(), - // k -> new HashMap<>() - // ); Map cache = (Map) fieldContext.cache.getOrDefault( UnifiedHighlighter.class.getName(), new HashMap<>() diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index e7112249d89b6..1c708a6e3a4e3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -86,6 +86,22 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d throws IOException { // TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes? + + // Create a source loader for highlighter use + SourceLoader sourceLoader = searchContext.newSourceLoader(null); + FetchContext fetchContext = new FetchContext(searchContext, sourceLoader); + MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName); + SearchHit searchHit = new SearchHit(docId); + Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); + String defaultHighlighter = fieldType.getDefaultHighlighter(); + + Highlighter highlighter; + // if (SemanticTextHighlighter.NAME.equals(defaultHighlighter)) { + // highlighter = new SemanticTextHighlighter(); + // } else { + highlighter = new DefaultHighlighter(); + // } + SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS); optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE); @@ -94,12 +110,6 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d optionsBuilder.requireFieldMatch(false); optionsBuilder.scoreOrdered(true); SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); - // Create a source loader for highlighter use - SourceLoader sourceLoader = searchContext.newSourceLoader(null); - FetchContext fetchContext = new FetchContext(searchContext, sourceLoader); - MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName); - SearchHit searchHit = new SearchHit(docId); - Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null); FieldHighlightContext highlightContext = new FieldHighlightContext( @@ -111,18 +121,19 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d query, new HashMap<>() ); - Highlighter highlighter = new DefaultHighlighter(); HighlightField highlight = highlighter.highlight(highlightContext); - boolean multivalued = highlight.fragments().length > 1; - if (multivalued) { - builder.beginPositionEntry(); - } - for (Text highlightText : highlight.fragments()) { - builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes())); - } - if (multivalued) { - builder.endPositionEntry(); + if (highlight != null) { + boolean multivalued = highlight.fragments().length > 1; + if (multivalued) { + builder.beginPositionEntry(); + } + for (Text highlightText : highlight.fragments()) { + builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes())); + } + if (multivalued) { + builder.endPositionEntry(); + } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index 159d179e9fd7b..4a33168be0b31 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -90,7 +90,8 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() : builder; try { - builder = builder.rewrite(ctx); + // builder = builder.rewrite(ctx); + builder = Rewriteable.rewrite(builder, ctx); } catch (IOException e) { exceptionHolder.setIfAbsent(e); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 78972373896b2..33aabe4682331 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -15,6 +15,7 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; @@ -217,7 +218,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch HighlightBuilder highlightBuilder = new HighlightBuilder(); if (queryBuilder != null) { - highlightBuilder.highlightQuery(queryBuilder); + // TODO validate this works and determine why this is not working in query builder resolver + QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); + highlightBuilder.highlightQuery(rewritten); } // Stripping pre/post tags as they're not useful for snippet creation highlightBuilder.field(field.sourceText()).preTags("").postTags(""); From 5b9347cf978b5741979ca624f3a0be3af2b9e3a4 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 12 Aug 2025 14:47:47 -0400 Subject: [PATCH 18/44] Make highlighters accessible --- .../elasticsearch/search/SearchModule.java | 18 ++++++++++++++++ .../highlight/SearchHighlightContext.java | 2 +- .../HighlighterExpressionEvaluator.java | 21 +++++++++---------- .../xpack/esql/evaluator/EvalMapper.java | 12 +++++++++++ .../evaluator/mapper/EvaluatorMapper.java | 6 ++++++ .../scalar/string/ExtractSnippets.java | 9 +++++++- .../xpack/esql/plugin/EsqlPlugin.java | 7 ++++++- 7 files changed, 61 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 56b203700b362..f93597b0f3e68 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -278,6 +278,8 @@ * Sets up things that can be done at search time like queries, aggregations, and suggesters. */ public class SearchModule { + private static volatile Map staticHighlighters = Map.of(); + public static final Setting INDICES_MAX_CLAUSE_COUNT_SETTING = Setting.intSetting( "indices.query.bool.max_clause_count", 4096, @@ -920,6 +922,20 @@ private static Map setupHighlighters(Settings settings, Lis return unmodifiableMap(highlighters.getRegistry()); } + /** + * Sets the static highlighters map for access by other plugins + */ + private static void setStaticHighlighters(Map highlighters) { + staticHighlighters = Map.copyOf(highlighters); + } + + /** + * Gets the static highlighters map for other plugin access + */ + public static Map getStaticHighlighters() { + return staticHighlighters; + } + private void registerScoreFunctions(List plugins) { // ScriptScoreFunctionBuilder has it own named writable because of a new script_score query namedWriteables.add( @@ -1059,6 +1075,8 @@ private void registerFetchSubPhases(List plugins) { registerFetchSubPhase(new HighlightPhase(highlighters)); registerFetchSubPhase(new FetchScorePhase()); + setStaticHighlighters(highlighters); + FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters); registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase); } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java index f06c667a073ba..111805be5b905 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/SearchHighlightContext.java @@ -269,7 +269,7 @@ Builder boundaryScannerLocale(Locale boundaryScannerLocale) { return this; } - Builder highlightQuery(Query highlightQuery) { + public Builder highlightQuery(Query highlightQuery) { fieldOptions.highlightQuery = highlightQuery; return this; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 1c708a6e3a4e3..122e9ffe7cb1e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -50,6 +50,7 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters; HighlighterExpressionEvaluator( BlockFactory blockFactory, @@ -57,13 +58,15 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters ) { super(blockFactory, shardConfigs); this.fieldName = fieldName; this.numFragments = numFragments; this.fragmentLength = fragmentLength; this.searchContext = searchContext; + this.highlighters = highlighters; } @Override @@ -93,14 +96,7 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName); SearchHit searchHit = new SearchHit(docId); Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); - String defaultHighlighter = fieldType.getDefaultHighlighter(); - - Highlighter highlighter; - // if (SemanticTextHighlighter.NAME.equals(defaultHighlighter)) { - // highlighter = new SemanticTextHighlighter(); - // } else { - highlighter = new DefaultHighlighter(); - // } + Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter()); SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS); @@ -109,6 +105,7 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d optionsBuilder.postTags(new String[] { "" }); optionsBuilder.requireFieldMatch(false); optionsBuilder.scoreOrdered(true); + optionsBuilder.highlightQuery(query); SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null); @@ -165,7 +162,8 @@ public record Factory( String fieldName, Integer numFragments, Integer fragmentSize, - SearchContext searchContext + SearchContext searchContext, + Map highlighters ) implements EvalOperator.ExpressionEvaluator.Factory { @Override public EvalOperator.ExpressionEvaluator get(DriverContext context) { @@ -175,7 +173,8 @@ public EvalOperator.ExpressionEvaluator get(DriverContext context) { fieldName, numFragments, fragmentSize, - searchContext + searchContext, + highlighters ); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java index d054a8cecb072..ac172eb7aa2ab 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java @@ -29,9 +29,16 @@ import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.InsensitiveEqualsMapper; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders.ShardContext; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; +import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.xpack.esql.planner.Layout; +import java.util.HashMap; import java.util.List; +import java.util.Map; public final class EvalMapper { @@ -79,6 +86,11 @@ public FoldContext foldCtx() { public List shardContexts() { return shardContexts; } + + @Override + public Map highlighters() { + return SearchModule.getStaticHighlighters(); + } }); } for (ExpressionMapper em : MAPPERS) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java index a4a17297abc09..3f561f22c4c24 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java @@ -17,6 +17,7 @@ import org.elasticsearch.indices.breaker.AllCircuitBreakerStats; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.indices.breaker.CircuitBreakerStats; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -25,6 +26,7 @@ import org.elasticsearch.xpack.esql.planner.Layout; import java.util.List; +import java.util.Map; import static org.elasticsearch.compute.data.BlockUtils.fromArrayRow; import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; @@ -41,6 +43,10 @@ interface ToEvaluator { default List shardContexts() { throw new UnsupportedOperationException("Shard contexts should only be needed for evaluation operations"); } + + default Map highlighters() { + throw new UnsupportedOperationException("Highlighters should only be needed for highlight operations"); + } } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 33aabe4682331..9cdaac96d3a34 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -254,7 +254,14 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { // Get field name and search context from the first shard context String fieldNameStr = field.sourceText(); SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext(); - return new HighlighterExpressionEvaluator.Factory(shardConfigs, fieldNameStr, numSnippets, snippedSize, firstSearchContext); + return new HighlighterExpressionEvaluator.Factory( + shardConfigs, + fieldNameStr, + numSnippets, + snippedSize, + firstSearchContext, + toEvaluator.highlighters() + ); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index f2f5b6b640311..a2aecaec98ac9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -42,6 +42,9 @@ import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.ExtensiblePlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.threadpool.ExecutorBuilder; @@ -82,12 +85,14 @@ import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.function.Predicate; import java.util.function.Supplier; -public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin { +public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin { public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled(); public static final String ESQL_WORKER_THREAD_POOL_NAME = "esql_worker"; From 44b1bc45edee6d630d535391d90379add82f1392 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 12 Aug 2025 19:49:49 +0000 Subject: [PATCH 19/44] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/esql/evaluator/EvalMapper.java | 8 ++------ .../org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 4 ---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java index ac172eb7aa2ab..642111d5d480b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java @@ -19,6 +19,8 @@ import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -29,14 +31,8 @@ import org.elasticsearch.xpack.esql.expression.predicate.logical.BinaryLogic; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.InsensitiveEqualsMapper; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders.ShardContext; -import org.elasticsearch.search.SearchModule; -import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; -import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.xpack.esql.planner.Layout; -import java.util.HashMap; import java.util.List; import java.util.Map; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index a2aecaec98ac9..47e7afc88f643 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -43,8 +43,6 @@ import org.elasticsearch.plugins.ExtensiblePlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; -import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.threadpool.ExecutorBuilder; @@ -85,9 +83,7 @@ import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.function.Predicate; import java.util.function.Supplier; From 82412d8c9d183e058a8240c341f224ec0c000dc0 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 12 Aug 2025 15:51:01 -0400 Subject: [PATCH 20/44] Return semantic highlight results --- .../xpack/inference/highlight/SemanticTextHighlighter.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java index 92333a10c4d08..cfbeb4718a6f6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; @@ -304,6 +305,8 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { public void visitLeaf(Query query) { if (query instanceof MatchAllDocsQuery) { queries.add(new MatchAllDocsQuery()); + } else if (query instanceof ToParentBlockJoinQuery toParentBlockJoinQuery) { + queries.add(toParentBlockJoinQuery.getChildQuery()); } } }); From d4ba21de604162bb11a5fd0df80c4f7addcc518d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 12 Aug 2025 20:33:46 +0000 Subject: [PATCH 21/44] [CI] Auto commit changes from spotless --- .../esql/expression/function/fulltext/FullTextFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index 79717ffa4ebb8..a9255cb11dba3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -16,8 +16,8 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; -import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; From 932864a010a4f3439855033a0ab96a36a56d79fc Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 13 Aug 2025 13:54:51 -0400 Subject: [PATCH 22/44] Cleanup --- .../java/org/elasticsearch/search/SearchHit.java | 2 +- .../java/org/elasticsearch/search/SearchModule.java | 13 ++----------- .../subphase/highlight/DefaultHighlighter.java | 4 ++-- .../fetch/subphase/highlight/HighlightPhase.java | 1 - .../lucene/HighlighterExpressionEvaluator.java | 5 ++++- .../xpack/esql/capabilities/RewriteableAware.java | 12 ++++++++++++ .../expression/function/EsqlFunctionRegistry.java | 1 - .../function/fulltext/QueryBuilderResolver.java | 2 +- .../function/scalar/string/ExtractSnippets.java | 10 +++++----- .../elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 1 + 10 files changed, 28 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java index a9c8e01fa32ac..41ba6e2099ff4 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchHit.java +++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java @@ -123,7 +123,7 @@ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity) { this(nestedTopDocId, id, nestedIdentity, null); } - private SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) { + public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) { this( nestedTopDocId, DEFAULT_SCORE, diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index d53a9a44a74b6..0509c9e78eac7 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -923,16 +923,6 @@ private static Map setupHighlighters(Settings settings, Lis return unmodifiableMap(highlighters.getRegistry()); } - /** - * Sets the static highlighters map for access by other plugins - */ - private static void setStaticHighlighters(Map highlighters) { - staticHighlighters = Map.copyOf(highlighters); - } - - /** - * Gets the static highlighters map for other plugin access - */ public static Map getStaticHighlighters() { return staticHighlighters; } @@ -1076,7 +1066,8 @@ private void registerFetchSubPhases(List plugins) { registerFetchSubPhase(new HighlightPhase(highlighters)); registerFetchSubPhase(new FetchScorePhase()); - setStaticHighlighters(highlighters); + // Store highlighters in a static map for other plugins to access + staticHighlighters = Map.copyOf(highlighters); FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters); registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 0424cdfc7e098..9ae3a1349510e 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -60,9 +60,9 @@ public boolean canHighlight(MappedFieldType fieldType) { @Override public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { @SuppressWarnings("unchecked") - Map cache = (Map) fieldContext.cache.getOrDefault( + Map cache = (Map) fieldContext.cache.computeIfAbsent( UnifiedHighlighter.class.getName(), - new HashMap<>() + k -> new HashMap<>() ); if (cache.containsKey(fieldContext.fieldName) == false) { cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index bd0bddea0261d..cf9e8fbf7ded0 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -66,7 +66,6 @@ public void process(HitContext hitContext) throws IOException { Map> contextBuilders = fieldContext.builders; for (String field : contextBuilders.keySet()) { FieldHighlightContext fieldContext = contextBuilders.get(field).apply(hitContext); - // TODO create this in ES|QL when processing matches Highlighter highlighter = getHighlighter(fieldContext.field, fieldContext.fieldType); HighlightField highlightField = highlighter.highlight(fieldContext); if (highlightField != null) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 122e9ffe7cb1e..b8278918ce745 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -42,6 +42,8 @@ import java.util.Map; import java.util.function.Supplier; +import static org.elasticsearch.core.RefCounted.ALWAYS_REFERENCED; + public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator implements EvalOperator.ExpressionEvaluator { @@ -94,10 +96,11 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d SourceLoader sourceLoader = searchContext.newSourceLoader(null); FetchContext fetchContext = new FetchContext(searchContext, sourceLoader); MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName); - SearchHit searchHit = new SearchHit(docId); + SearchHit searchHit = new SearchHit(docId, null, null, ALWAYS_REFERENCED); Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter()); + // TODO: Consolidate these options with the ones built in the text similarity reranker SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS); optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index 097bee3a89343..7841a13b64966 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -10,10 +10,22 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; +/** + * Defines objects that need to go through the rewrite phase. + */ public interface RewriteableAware extends TranslationAware { + /** + * @return The current active query builder. + */ QueryBuilder queryBuilder(); + /** + * Replaces the current query builder with a rewritten iteration. This happens multiple times through the rewrite phase until + * the final iteration of the query builder is stored. + * @param queryBuilder QueryBuilder + * @return Expression defining the active QueryBuilder + */ Expression replaceQueryBuilder(QueryBuilder queryBuilder); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 7bb11ae54d967..67637e5db5a53 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -505,7 +505,6 @@ private static FunctionDefinition[][] snapshotFunctions() { def(DotProduct.class, DotProduct::new, "v_dot_product"), def(L1Norm.class, L1Norm::new, "v_l1_norm"), def(L2Norm.class, L2Norm::new, "v_l2_norm"), - def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), def(ExtractSnippets.class, quad(ExtractSnippets::new), "extract_snippets") } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index 4a33168be0b31..ff94211e07963 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -90,7 +90,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() : builder; try { - // builder = builder.rewrite(ctx); + // TODO: Even when changing this to Rewriteable#rewrite, this still doesn't execute the full rewrite phase. Bug? builder = Rewriteable.rewrite(builder, ctx); } catch (IOException e) { exceptionHolder.setIfAbsent(e); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 9cdaac96d3a34..6a9d5626f076b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -67,9 +67,10 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum ); private static final int DEFAULT_NUM_SNIPPETS = 1; - private static final int DEFAULT_SNIPPET_LENGTH = 10; // TODO determine a good default. 512 * 5? + // TODO: This default should be in line with the text similarity reranker. Set artificially low here for POC purposes. + private static final int DEFAULT_SNIPPET_LENGTH = 10; - // TODO better names? + // TODO: better names? private final Expression field, str, numSnippets, snippetLength; private final QueryBuilder queryBuilder; @@ -218,13 +219,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch HighlightBuilder highlightBuilder = new HighlightBuilder(); if (queryBuilder != null) { - // TODO validate this works and determine why this is not working in query builder resolver + // TODO: Ideally we'd only need to rewrite in the QueryBuilderResolver, but we need semantic rewrites to happen + // on both coordinator and data nodes. QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); highlightBuilder.highlightQuery(rewritten); } - // Stripping pre/post tags as they're not useful for snippet creation highlightBuilder.field(field.sourceText()).preTags("").postTags(""); - // Return highest scoring fragments highlightBuilder.order(HighlightBuilder.Order.SCORE); highlightBuilder.numOfFragments(numSnippets); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 98e8ef61b4b12..94f293ce0f5d0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -89,6 +89,7 @@ import java.util.function.Predicate; import java.util.function.Supplier; +// NOTE: SearchPlugin required to get access to highlighters public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin { public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled(); From 632df21b6dca1b6ebd7962daeccd2d594ae24c99 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 13 Aug 2025 19:09:12 +0000 Subject: [PATCH 23/44] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/esql/capabilities/RewriteableAware.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java index 7841a13b64966..41886573fdbb2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/capabilities/RewriteableAware.java @@ -24,7 +24,7 @@ public interface RewriteableAware extends TranslationAware { * Replaces the current query builder with a rewritten iteration. This happens multiple times through the rewrite phase until * the final iteration of the query builder is stored. * @param queryBuilder QueryBuilder - * @return Expression defining the active QueryBuilder + * @return Expression defining the active QueryBuilder */ Expression replaceQueryBuilder(QueryBuilder queryBuilder); From 838b054c8caef67c471673cd3230d4a084867d1e Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 14 Aug 2025 14:59:00 -0400 Subject: [PATCH 24/44] Move highlighters from EvalMapper to SearchContext --- .../org/elasticsearch/search/internal/SearchContext.java | 6 ++++++ .../org/elasticsearch/xpack/esql/evaluator/EvalMapper.java | 5 ----- .../xpack/esql/evaluator/mapper/EvaluatorMapper.java | 6 ------ .../expression/function/scalar/string/ExtractSnippets.java | 4 +++- .../org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 3 +-- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java index 7d018a7ef4ba9..cb3ddb7deb5cc 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -28,6 +28,7 @@ import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.search.RescoreDocIds; import org.elasticsearch.search.SearchExtBuilder; +import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.collapse.CollapseContext; @@ -40,6 +41,7 @@ import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.fetch.subphase.InnerHitsContext; import org.elasticsearch.search.fetch.subphase.ScriptFieldsContext; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.search.profile.Profilers; @@ -152,6 +154,10 @@ public final boolean isClosed() { public abstract void highlight(SearchHighlightContext highlight); + public Map highlighters() { + return SearchModule.getStaticHighlighters(); + } + public InnerHitsContext innerHits() { if (innerHitsContext == null) { innerHitsContext = new InnerHitsContext(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java index 642111d5d480b..a6b01c34d5817 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java @@ -82,11 +82,6 @@ public FoldContext foldCtx() { public List shardContexts() { return shardContexts; } - - @Override - public Map highlighters() { - return SearchModule.getStaticHighlighters(); - } }); } for (ExpressionMapper em : MAPPERS) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java index 3f561f22c4c24..a4a17297abc09 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/mapper/EvaluatorMapper.java @@ -17,7 +17,6 @@ import org.elasticsearch.indices.breaker.AllCircuitBreakerStats; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.indices.breaker.CircuitBreakerStats; -import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -26,7 +25,6 @@ import org.elasticsearch.xpack.esql.planner.Layout; import java.util.List; -import java.util.Map; import static org.elasticsearch.compute.data.BlockUtils.fromArrayRow; import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; @@ -43,10 +41,6 @@ interface ToEvaluator { default List shardContexts() { throw new UnsupportedOperationException("Shard contexts should only be needed for evaluation operations"); } - - default Map highlighters() { - throw new UnsupportedOperationException("Highlighters should only be needed for highlight operations"); - } } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 6a9d5626f076b..cd53d3d67e86c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -18,6 +18,7 @@ import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; @@ -254,13 +255,14 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { // Get field name and search context from the first shard context String fieldNameStr = field.sourceText(); SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext(); + Map highlighters = firstSearchContext == null ? Map.of() : firstSearchContext.highlighters(); return new HighlighterExpressionEvaluator.Factory( shardConfigs, fieldNameStr, numSnippets, snippedSize, firstSearchContext, - toEvaluator.highlighters() + highlighters ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 94f293ce0f5d0..5dfa46f369662 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -89,8 +89,7 @@ import java.util.function.Predicate; import java.util.function.Supplier; -// NOTE: SearchPlugin required to get access to highlighters -public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin, SearchPlugin { +public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin { public static final boolean INLINESTATS_FEATURE_FLAG = new FeatureFlag("esql_inlinestats").isEnabled(); public static final String ESQL_WORKER_THREAD_POOL_NAME = "esql_worker"; From 0b0487e2556a7c73117a6ecad52aa7acae4d82c9 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 14 Aug 2025 15:06:00 -0400 Subject: [PATCH 25/44] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java Co-authored-by: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com> --- .../esql/expression/function/fulltext/QueryBuilderResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index ff94211e07963..22df04eb9e823 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -84,7 +84,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { Holder updated = new Holder<>(false); LogicalPlan newPlan = plan.transformExpressionsDown(Expression.class, expr -> { Expression finalExpression = expr; - if (expr instanceof RewriteableAware rewriteableAware && expr instanceof TranslationAware translationAware) { + if (expr instanceof RewriteableAware rewriteableAware) { QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder; builder = builder == null ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() From eee88bec15b31530a88223b12eebb673a0d63054 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 14 Aug 2025 19:13:30 +0000 Subject: [PATCH 26/44] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/evaluator/EvalMapper.java | 3 --- .../expression/function/fulltext/QueryBuilderResolver.java | 1 - .../java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java | 1 - 3 files changed, 5 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java index a6b01c34d5817..d054a8cecb072 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java @@ -19,8 +19,6 @@ import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.core.Releasables; -import org.elasticsearch.search.SearchModule; -import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -34,7 +32,6 @@ import org.elasticsearch.xpack.esql.planner.Layout; import java.util.List; -import java.util.Map; public final class EvalMapper { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index 22df04eb9e823..592531b33ac10 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -13,7 +13,6 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.xpack.esql.capabilities.RewriteableAware; -import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 5dfa46f369662..776874fbf90f6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -43,7 +43,6 @@ import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.ExtensiblePlugin; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.threadpool.ExecutorBuilder; From 77b44d5de5d4130ac1cefe5a8eac285d68f4c1e5 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 14 Aug 2025 15:12:59 -0400 Subject: [PATCH 27/44] Cleanup how we pull field attributes in extract snippets --- .../scalar/string/ExtractSnippets.java | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index cd53d3d67e86c..d25bacb0dedff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -225,7 +225,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); highlightBuilder.highlightQuery(rewritten); } - highlightBuilder.field(field.sourceText()).preTags("").postTags(""); + highlightBuilder.field(fieldName()).preTags("").postTags(""); highlightBuilder.order(HighlightBuilder.Order.SCORE); highlightBuilder.numOfFragments(numSnippets); @@ -238,9 +238,9 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { } catch (IOException e) { throw new RuntimeException( "Failed to create highlight context for field [" - + field.sourceText() + + fieldName() + "], str [" - + str.sourceText() + + searchString() + "], numSnippets: [" + numSnippets + "], snippetLength: [" @@ -253,12 +253,11 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { shardConfigs[i++] = new LuceneQueryEvaluator.ShardConfig(shardContext.toQuery(queryBuilder), shardContext.searcher()); } // Get field name and search context from the first shard context - String fieldNameStr = field.sourceText(); - SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.get(0).searchContext(); + SearchContext firstSearchContext = shardContexts.isEmpty() ? null : shardContexts.getFirst().searchContext(); Map highlighters = firstSearchContext == null ? Map.of() : firstSearchContext.highlighters(); return new HighlighterExpressionEvaluator.Factory( shardConfigs, - fieldNameStr, + fieldName(), numSnippets, snippedSize, firstSearchContext, @@ -290,22 +289,31 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand } private Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { - var fieldAttribute = fieldAsFieldAttribute(field()); - Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument"); - String fieldName = getNameFromFieldAttribute(fieldAttribute); Object query = str().fold(FoldContext.small()); // Make query lenient so mixed field types can be queried when a field type is incompatible with the value provided - return new MatchQuery(source(), fieldName, query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true)); + return new MatchQuery(source(), fieldName(), query, Map.of(MatchQueryBuilder.LENIENT_FIELD.getPreferredName(), true)); } Expression field() { return field; } + private String fieldName() { + var fieldAttribute = fieldAsFieldAttribute(field()); + Check.notNull(fieldAttribute, "Highlight must have a field attribute as the first argument"); + return getNameFromFieldAttribute(fieldAttribute); + } + Expression str() { return str; } + private String searchString() { + var strAttribute = fieldAsFieldAttribute(str()); + Check.notNull(strAttribute, "Highlight must have a str attribute as the second argument"); + return getNameFromFieldAttribute(strAttribute); + } + Expression numSnippets() { return numSnippets; } From 5ab3c568345c9e2809f8b704796fb127db1ccd90 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 14 Aug 2025 15:30:29 -0400 Subject: [PATCH 28/44] Fix compilation error due to auto-commit suggestion --- .../esql/expression/function/fulltext/QueryBuilderResolver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java index 592531b33ac10..cbc2f598abb60 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/QueryBuilderResolver.java @@ -86,7 +86,7 @@ public FunctionsRewritable rewrite(QueryRewriteContext ctx) throws IOException { if (expr instanceof RewriteableAware rewriteableAware) { QueryBuilder builder = rewriteableAware.queryBuilder(), initial = builder; builder = builder == null - ? translationAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() + ? rewriteableAware.asQuery(LucenePushdownPredicates.DEFAULT, TranslatorHandler.TRANSLATOR_HANDLER).toQueryBuilder() : builder; try { // TODO: Even when changing this to Rewriteable#rewrite, this still doesn't execute the full rewrite phase. Bug? From a6a0f11277d1d71e439d2ce0569b7d5045246e66 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 18 Aug 2025 11:05:36 -0400 Subject: [PATCH 29/44] Add queryBuilder to ExtractSnippets#info --- .../esql/expression/function/scalar/string/ExtractSnippets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index d25bacb0dedff..10811fa9dd628 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -197,7 +197,7 @@ public Expression replaceChildren(List newChildren) { @Override protected NodeInfo info() { - return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength); + return NodeInfo.create(this, ExtractSnippets::new, field, str, numSnippets, snippetLength, queryBuilder); } @Override From 9c7609c653fbcbaf79440cce1b3ad72a494c29b0 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 18 Aug 2025 11:15:53 -0400 Subject: [PATCH 30/44] Move construction of objects to ctor when possible --- .../lucene/HighlighterExpressionEvaluator.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index b8278918ce745..230e6e36ff752 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -22,6 +22,7 @@ import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -51,8 +52,9 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters; + private final FetchContext fetchContext; + private final MappedFieldType fieldType; HighlighterExpressionEvaluator( BlockFactory blockFactory, @@ -67,8 +69,16 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator Date: Mon, 18 Aug 2025 11:39:12 -0400 Subject: [PATCH 31/44] Refactor highlighting logic into util class --- .../highlight/HighlightSnippetUtils.java | 60 +++++++++++++++++++ .../HighlighterExpressionEvaluator.java | 19 +++--- ...nkingRankFeaturePhaseRankShardContext.java | 19 +++--- 3 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java new file mode 100644 index 0000000000000..bb7cf4ba0e675 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.search.Query; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.io.IOException; +import java.util.List; + +/** + * Utility class for building highlighting queries for the purpose of extracting snippets. + */ +public class HighlightSnippetUtils { + + public static SearchHighlightContext buildSearchHighlightContextForSnippets( + SearchExecutionContext searchExecutionContext, + String field, + int numSnippets, + int snippetCharLength, + QueryBuilder queryBuilder + ) throws IOException { + SearchHighlightContext.Field highlightField = buildFieldHighlightContextForSnippets( + searchExecutionContext, + field, + numSnippets, + snippetCharLength, + queryBuilder.toQuery(searchExecutionContext) + ); + return new SearchHighlightContext(List.of(highlightField)); + } + + public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets( + SearchExecutionContext searchExecutionContext, + String fieldName, + int numSnippets, + int snippetCharLength, + Query query + ) { + SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); + optionsBuilder.numberOfFragments(numSnippets); + optionsBuilder.fragmentCharSize(snippetCharLength); + optionsBuilder.noMatchSize(snippetCharLength); + optionsBuilder.preTags(new String[] { "" }); + optionsBuilder.postTags(new String[] { "" }); + optionsBuilder.requireFieldMatch(false); + optionsBuilder.scoreOrdered(true); + optionsBuilder.highlightQuery(query); + return new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); + } + +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 230e6e36ff752..625df8768d6ef 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -30,6 +30,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; @@ -106,17 +107,13 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId)); Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter()); - // TODO: Consolidate these options with the ones built in the text similarity reranker - SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); - optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS); - optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE); - optionsBuilder.preTags(new String[] { "" }); - optionsBuilder.postTags(new String[] { "" }); - optionsBuilder.requireFieldMatch(false); - optionsBuilder.scoreOrdered(true); - optionsBuilder.highlightQuery(query); - SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build()); - + SearchHighlightContext.Field field = HighlightSnippetUtils.buildFieldHighlightContextForSnippets( + fetchContext.getSearchExecutionContext(), + fieldName, + numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS, + fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE, + query + ); FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null); FieldHighlightContext highlightContext = new FieldHighlightContext( fieldName, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index 66fb4a366a757..5c3ae35f72ea2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -12,8 +12,8 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.rank.RankShardResult; @@ -73,20 +73,17 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI public void prepareForFetch(SearchContext context) { if (snippetRankInput != null) { try { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - highlightBuilder.highlightQuery(snippetRankInput.snippetQueryBuilder()); - // Stripping pre/post tags as they're not useful for snippet creation - highlightBuilder.field(field).preTags("").postTags(""); - // Return highest scoring fragments - highlightBuilder.order(HighlightBuilder.Order.SCORE); int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS; - highlightBuilder.numOfFragments(numSnippets); // Rely on the model to determine the fragment size int tokenSizeLimit = snippetRankInput.tokenSizeLimit(); int fragmentSize = tokenSizeLimit * TOKEN_SIZE_LIMIT_MULTIPLIER; - highlightBuilder.fragmentSize(fragmentSize); - highlightBuilder.noMatchSize(fragmentSize); - SearchHighlightContext searchHighlightContext = highlightBuilder.build(context.getSearchExecutionContext()); + SearchHighlightContext searchHighlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets( + context.getSearchExecutionContext(), + field, + numSnippets, + fragmentSize, + snippetRankInput.snippetQueryBuilder() + ); context.highlight(searchHighlightContext); } catch (IOException e) { throw new RuntimeException("Failed to generate snippet request", e); From 675e78b2aa05a958bf7270eebf430d227b343e64 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 18 Aug 2025 14:26:04 -0400 Subject: [PATCH 32/44] Fix EsqlNodeSubclassTests#testReplaceChildren --- .../expression/function/scalar/string/ExtractSnippets.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 10811fa9dd628..fd74e6eda6975 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -191,7 +191,8 @@ public Expression replaceChildren(List newChildren) { newChildren.get(0), // field newChildren.get(1), // str numSnippets == null ? null : newChildren.get(2), - snippetLength == null ? null : newChildren.get(3) + snippetLength == null ? null : newChildren.get(3), + queryBuilder ); } From d5c9d9141282ac260f2978eaf92571c9766586fa Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 18 Aug 2025 16:24:24 -0400 Subject: [PATCH 33/44] Start adding CSV tests --- .../extract-snippets-function.csv-spec | 92 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../elasticsearch/xpack/esql/CsvTests.java | 4 + 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec new file mode 100644 index 0000000000000..db4df965f58ea --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec @@ -0,0 +1,92 @@ +############################################### +# Tests for ExtractSnippets function +# + +extractSnippetsWithField +required_capability: extract_snippets_function + +// tag::extract-snippets-with-field[] +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 1, 25) +// end::extract-snippets-with-field[] +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +// tag::extract-snippets-with-field-result[] +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit +// end::extract-snippets-with-field-result[] +; + +extractMultipleSnippetsWithField +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3, 25) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of The Hobbit, Tolkien's own children, The Hobbit] +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Hobbit, , THE HOBBIT: AN UNEXPECTED, film adaptation of The Hobbit] +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit +; + + +extractMultipleSnippetsWithFieldMvExpand +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3, 25) +| MV_EXPAND snippets +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of The Hobbit +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, The Hobbit +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit +; + +extractMultipleSnippetsWithSomeNoMatches +required_capability: extract_snippets_function + +FROM books +| WHERE MATCH(author, "Faulkner") +| EVAL snippets = extract_snippets(description, "slavery", 1, 10) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | abolition of slavery +2713 | William Faulkner | Collected Stories of William Faulkner | null +2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null +2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null +3293 | Danny Faulkner | Universe by Design | null +; + + +extractSnippetsWithDefaultNumSnippetsAndLength +required_capability: extract_snippets_function + +extractSnippetsCalledMultipleTimes +required_capability: extract_snippets_function diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index a09a9177203c4..95db01522e748 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1345,7 +1345,12 @@ public enum Cap { /** * Support correct counting of skipped shards. */ - CORRECT_SKIPPED_SHARDS_COUNT; + CORRECT_SKIPPED_SHARDS_COUNT, + + /** + * Support for the EXTRACT_SNIPPETS function. + */ + EXTRACT_SNIPPETS_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index d149fb012a14b..ca4ba0eff830b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -335,6 +335,10 @@ public final void test() throws Throwable { "CSV tests cannot currently handle multi_match function that depends on Lucene", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.capabilityName()) ); + assumeFalse( + "CSV tests cannot currently handle EXTRACT_SNIPPETS", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.EXTRACT_SNIPPETS_FUNCTION.capabilityName()) + ); if (Build.current().isSnapshot()) { assertThat( From bd369f794f6ce7b01dee5047116f755e3a671d8e Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 19 Aug 2025 14:44:02 -0400 Subject: [PATCH 34/44] Fix initialization error --- .../extract-snippets-function.csv-spec | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec index db4df965f58ea..d512b1ce72a7e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec @@ -6,26 +6,40 @@ extractSnippetsWithField required_capability: extract_snippets_function // tag::extract-snippets-with-field[] +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +// end::extract-snippets-with-field[] +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 1 +; + +// tag::extract-snippets-with-field-result[] +book_no:keyword | author:text | title:text | snippets:keyword +1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps the +// end::extract-snippets-with-field-result[] +; + +extractSnippetsWithMatch +required_capability: extract_snippets_function + FROM books | WHERE MATCH(description, "hobbit") | EVAL snippets = extract_snippets(description, "hobbit", 1, 25) -// end::extract-snippets-with-field[] | KEEP book_no, author, title, snippets | SORT book_no | LIMIT 5 ; -// tag::extract-snippets-with-field-result[] book_no:keyword | author:text | title:text | snippets:keyword 1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit 2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT 2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit 2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit 2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit -// end::extract-snippets-with-field-result[] ; -extractMultipleSnippetsWithField +extractMultipleSnippetsWithMatch required_capability: extract_snippets_function FROM books @@ -45,7 +59,7 @@ book_no:keyword | author:text | title:text ; -extractMultipleSnippetsWithFieldMvExpand +extractMultipleSnippetsWithMatchMvExpand required_capability: extract_snippets_function FROM books @@ -83,10 +97,3 @@ book_no:keyword | author:text | title:tex 2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null 3293 | Danny Faulkner | Universe by Design | null ; - - -extractSnippetsWithDefaultNumSnippetsAndLength -required_capability: extract_snippets_function - -extractSnippetsCalledMultipleTimes -required_capability: extract_snippets_function From ccda43d8214b55e85ac9c51eaf7e76980400d077 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 19 Aug 2025 14:58:31 -0400 Subject: [PATCH 35/44] Clean up duplication when creating highlighter --- .../scalar/string/ExtractSnippets.java | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index fd74e6eda6975..ee35ccfc9a14c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -17,7 +17,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.Rewriteable; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightSnippetUtils; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.internal.SearchContext; @@ -68,10 +68,9 @@ public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgum ); private static final int DEFAULT_NUM_SNIPPETS = 1; - // TODO: This default should be in line with the text similarity reranker. Set artificially low here for POC purposes. + // TODO: Determine good default, set artificially low for POC purposes private static final int DEFAULT_SNIPPET_LENGTH = 10; - // TODO: better names? private final Expression field, str, numSnippets, snippetLength; private final QueryBuilder queryBuilder; @@ -206,8 +205,8 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { List shardContexts = toEvaluator.shardContexts(); LuceneQueryEvaluator.ShardConfig[] shardConfigs = new LuceneQueryEvaluator.ShardConfig[shardContexts.size()]; - Integer numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); - Integer snippedSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small()); + int numSnippets = this.numSnippets == null ? DEFAULT_NUM_SNIPPETS : (Integer) this.numSnippets.fold(FoldContext.small()); + int snippetSize = this.snippetLength == null ? DEFAULT_SNIPPET_LENGTH : (Integer) this.snippetLength.fold(FoldContext.small()); int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { @@ -218,22 +217,16 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { } try { - // TODO: Reduce duplication between this method and TextSimilarityRerankingRankFeaturePhaseRankShardContext#prepareForFetch - HighlightBuilder highlightBuilder = new HighlightBuilder(); - if (queryBuilder != null) { - // TODO: Ideally we'd only need to rewrite in the QueryBuilderResolver, but we need semantic rewrites to happen - // on both coordinator and data nodes. - QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); - highlightBuilder.highlightQuery(rewritten); - } - highlightBuilder.field(fieldName()).preTags("").postTags(""); - highlightBuilder.order(HighlightBuilder.Order.SCORE); - - highlightBuilder.numOfFragments(numSnippets); - highlightBuilder.fragmentSize(snippedSize); - highlightBuilder.noMatchSize(snippedSize); - - SearchHighlightContext highlightContext = highlightBuilder.build(searchExecutionContext); + // We need to call rewrite here, to ensure we rewrite on both coordinator and data nodes. + assert queryBuilder != null : "ExtractSnippets missing required state"; + QueryBuilder rewritten = Rewriteable.rewrite(queryBuilder, searchExecutionContext); + SearchHighlightContext highlightContext = HighlightSnippetUtils.buildSearchHighlightContextForSnippets( + searchExecutionContext, + fieldName(), + numSnippets, + snippetSize, + rewritten + ); searchContext.highlight(highlightContext); } catch (IOException e) { @@ -260,7 +253,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { shardConfigs, fieldName(), numSnippets, - snippedSize, + snippetSize, firstSearchContext, highlighters ); From 35120e68cda4ca7e5b2611ccfcb38bda18e4d0e8 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 19 Aug 2025 15:34:08 -0400 Subject: [PATCH 36/44] Support default parameters when not specified --- .../scalar/string/ExtractSnippets.java | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index ee35ccfc9a14c..159070ddf11c1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -33,8 +33,8 @@ import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; -import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.TwoOptionalArguments; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; @@ -44,6 +44,7 @@ import org.elasticsearch.xpack.esql.querydsl.query.TranslationAwareExpressionQuery; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -60,7 +61,12 @@ /** * Extract snippets function, that extracts the most relevant snippets from a given input string */ -public class ExtractSnippets extends EsqlScalarFunction implements OptionalArgument, RewriteableAware, TranslationAware, EvaluatorMapper { +public class ExtractSnippets extends EsqlScalarFunction + implements + TwoOptionalArguments, + RewriteableAware, + TranslationAware, + EvaluatorMapper { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "ExtractSnippets", @@ -108,7 +114,7 @@ public ExtractSnippets( Expression snippetLength, QueryBuilder queryBuilder ) { - super(source, List.of(field, str, numSnippets, snippetLength)); + super(source, fields(field, str, numSnippets, snippetLength)); this.field = field; this.str = str; this.numSnippets = numSnippets; @@ -331,4 +337,17 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(field(), str(), numSnippets(), snippetLength(), queryBuilder()); } + + private static List fields(Expression field, Expression str, Expression numSnippets, Expression snippetLength) { + List list = new ArrayList<>(4); + list.add(field); + list.add(str); + if (numSnippets != null) { + list.add(numSnippets); + if (snippetLength != null) { + list.add(snippetLength); + } + } + return list; + } } From de46fef7f19d603e9e25e777c517fa7c2d1eed05 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 19 Aug 2025 16:35:18 -0400 Subject: [PATCH 37/44] Fix char encoding bug for text fields (not semantic_text) --- .../search/fetch/subphase/highlight/HighlightSnippetUtils.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java index bb7cf4ba0e675..ffba59dd1c47c 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java @@ -48,6 +48,9 @@ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); optionsBuilder.numberOfFragments(numSnippets); optionsBuilder.fragmentCharSize(snippetCharLength); + // Note: The default SENTENCE boundary scanner used by the DefaultHighlighter will return fragments larger than the specified + // snippetLength. This has implications when appending and calculating ByteArrays, so we specify WORD. + optionsBuilder.boundaryScannerType(HighlightBuilder.BoundaryScannerType.WORD); optionsBuilder.noMatchSize(snippetCharLength); optionsBuilder.preTags(new String[] { "" }); optionsBuilder.postTags(new String[] { "" }); From 5f20480c0c9d0a66ac3e0fe8bd2d309f3814f865 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 20 Aug 2025 14:53:23 -0400 Subject: [PATCH 38/44] Truncate snippets that are longer than requested size --- .../highlight/HighlightSnippetUtils.java | 3 -- .../HighlighterExpressionEvaluator.java | 44 ++++++++++++++++--- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java index ffba59dd1c47c..bb7cf4ba0e675 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightSnippetUtils.java @@ -48,9 +48,6 @@ public static SearchHighlightContext.Field buildFieldHighlightContextForSnippets SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder(); optionsBuilder.numberOfFragments(numSnippets); optionsBuilder.fragmentCharSize(snippetCharLength); - // Note: The default SENTENCE boundary scanner used by the DefaultHighlighter will return fragments larger than the specified - // snippetLength. This has implications when appending and calculating ByteArrays, so we specify WORD. - optionsBuilder.boundaryScannerType(HighlightBuilder.BoundaryScannerType.WORD); optionsBuilder.noMatchSize(snippetCharLength); optionsBuilder.preTags(new String[] { "" }); optionsBuilder.postTags(new String[] { "" }); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 625df8768d6ef..65e9375458299 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -39,6 +39,12 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -51,8 +57,8 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator highlighters; private final FetchContext fetchContext; private final MappedFieldType fieldType; @@ -68,8 +74,8 @@ public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator fragmentLength) { + // TODO - This isn't a great solution, but in order to resolve character encoding issues in the + // returned BytesRef we need to ensure that the fragment size we return is equal to what was requested. + // Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now. + byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength); + builder.appendBytesRef(new BytesRef(truncatedBytes)); + } else { + builder.appendBytesRef(new BytesRef(highlightBytes)); + } } if (multivalued) { builder.endPositionEntry(); @@ -140,6 +155,21 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d } } + private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws CharacterCodingException { + if (bytes.length <= maxLength) return bytes; + + CharsetDecoder dec = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.IGNORE) + .onUnmappableCharacter(CodingErrorAction.IGNORE); + + CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength)); + ByteBuffer out = StandardCharsets.UTF_8.encode(chars); + + byte[] result = new byte[out.remaining()]; + out.get(result); + return result; + } + private static Supplier lazyStoredSourceLoader(LeafReaderContext ctx, int doc) { return () -> { StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet()); From ae92c8325061d9a2a520ca2cb96055af910bc2a7 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 20 Aug 2025 15:27:49 -0400 Subject: [PATCH 39/44] Fix most extractSnippets CSV tests, add some more test cases --- .../HighlighterExpressionEvaluator.java | 6 +- .../extract-snippets-function.csv-spec | 84 ++++++++++++++----- 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java index 65e9375458299..6a788d541463e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/HighlighterExpressionEvaluator.java @@ -140,7 +140,8 @@ protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int d for (Text highlightText : highlight.fragments()) { byte[] highlightBytes = highlightText.bytes().bytes(); if (highlightBytes.length > fragmentLength) { - // TODO - This isn't a great solution, but in order to resolve character encoding issues in the + // TODO - Figure out a better way to construct BytesRef + // This isn't a great solution, but in order to resolve character encoding issues in the // returned BytesRef we need to ensure that the fragment size we return is equal to what was requested. // Since the highlighter's default sentence boundary scanner can return longer fragments, we're truncating for now. byte[] truncatedBytes = truncateUtf8(highlightBytes, fragmentLength); @@ -163,7 +164,8 @@ private static byte[] truncateUtf8(byte[] bytes, int maxLength) throws Character .onUnmappableCharacter(CodingErrorAction.IGNORE); CharBuffer chars = dec.decode(ByteBuffer.wrap(bytes, 0, maxLength)); - ByteBuffer out = StandardCharsets.UTF_8.encode(chars); + String trimmed = chars.toString().trim(); + ByteBuffer out = StandardCharsets.UTF_8.encode(trimmed); byte[] result = new byte[out.remaining()]; out.get(result); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec index d512b1ce72a7e..d432b3c4da377 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/extract-snippets-function.csv-spec @@ -16,7 +16,7 @@ FROM books // tag::extract-snippets-with-field-result[] book_no:keyword | author:text | title:text | snippets:keyword -1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps the +1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th // end::extract-snippets-with-field-result[] ; @@ -25,18 +25,18 @@ required_capability: extract_snippets_function FROM books | WHERE MATCH(description, "hobbit") -| EVAL snippets = extract_snippets(description, "hobbit", 1, 25) +| EVAL snippets = extract_snippets(description, "hobbit", 1, 50) | KEEP book_no, author, title, snippets | SORT book_no | LIMIT 5 ; book_no:keyword | author:text | title:text | snippets:keyword -1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit -2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT -2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit -2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | the story begun in The Hobbit -2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | is accompanied by appropriate passage from The Hob +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | Tolkien, beloved author of THE HOBBIT. +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | This beautiful gift edition of The Hobbit, J.R.R. +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | Concluding the story begun in The Hobbit, this is +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | them all - which has fallen into the hands of the ; extractMultipleSnippetsWithMatch @@ -51,11 +51,11 @@ FROM books ; book_no:keyword | author:text | title:text | snippets:keyword -1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit -2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT -2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of The Hobbit, Tolkien's own children, The Hobbit] -2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Hobbit, , THE HOBBIT: AN UNEXPECTED, film adaptation of The Hobbit] -2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hobbit +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [Bilbo Baggins is a hobbit, beautiful gift edition of, Tolkien's own children, T] +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [the story begun in The Ho, , THE HOBBIT: AN UNEXPECT, film adaptation of The Ho] +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | into the hands of the hob ; @@ -67,16 +67,20 @@ FROM books | EVAL snippets = extract_snippets(description, "hobbit", 3, 25) | MV_EXPAND snippets | KEEP book_no, author, title, snippets -| SORT book_no -| LIMIT 5 +| SORT snippets +| LIMIT 9 ; -book_no:keyword | author:text | title:text | snippets:keyword -1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from The Hobbit -2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | beloved author of THE HOBBIT -2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of The Hobbit -2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, The Hobbit -2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit +book_no:keyword | author:text | title:text | snippets:keyword +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | , THE HOBBIT: AN UNEXPECT +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Bilbo Baggins is a hobbit +6760 | J. R. R. Tolkien | Roverandom | By the author of The Hobb +7350 | [Christopher Tolkien, John Ronald Reuel Tolkien] | Return of the Shadow | The character of the hobb +4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's Hobbit poems in +4289 | J R R Tolkien | Poems from the Hobbit | Tolkien's acclaimed The H +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | Tolkien's own children, T +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | appropriate passage from +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | beautiful gift edition of ; extractMultipleSnippetsWithSomeNoMatches @@ -84,16 +88,52 @@ required_capability: extract_snippets_function FROM books | WHERE MATCH(author, "Faulkner") -| EVAL snippets = extract_snippets(description, "slavery", 1, 10) +| EVAL snippets = extract_snippets(description, "slavery", 1, 25) | KEEP book_no, author, title, snippets | SORT book_no | LIMIT 5 ; book_no:keyword | author:text | title:text | snippets:keyword -2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | abolition of slavery +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | Selected Letters of Lucretia Coffin Mott (Women in American History) | , and the abolition of sl 2713 | William Faulkner | Collected Stories of William Faulkner | null 2847 | Colleen Faulkner | To Love A Dark Stranger (Lovegram Historical Romance) | null 2883 | William Faulkner | A Summer of Faulkner: As I Lay Dying/The Sound and the Fury/Light in August (Oprah's Book Club) | null 3293 | Danny Faulkner | Universe by Design | null ; + +extractSnippetsWithDefaultNumSnippetsAndLength + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit") +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | of The Hob +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | in The Hob +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob +; + +extractSnippetsWithDefaultLength + +FROM books +| WHERE MATCH(description, "hobbit") +| EVAL snippets = extract_snippets(description, "hobbit", 3) +| KEEP book_no, author, title, snippets +| SORT book_no +| LIMIT 5 +; + +book_no:keyword | author:text | title:text | snippets:keyword +1463 | J. R. R. Tolkien | Realms of Tolkien: Images of Middle-earth | from The H +2301 | John Ronald Reuel Tolkien | Smith of Wootton Major & Farmer Giles of Ham | of THE HOB +2675 | J.R.R. Tolkien | The Lord of the Rings - Boxed Set | [of The Hob, Baggins is, children,] +2714 | J. R. R. Tolkien | Return of the King Being the Third Part of The Lord of the Rings | [in The Hob, , THE HOBB, of The Hob] +2936 | John Ronald Reuel Tolkien | Fellowship of the Ring 2ND Edition | of the hob +; From 48c2825ad26e9dc93604c81b3e67ab5d388eb038 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 21 Aug 2025 10:14:57 -0400 Subject: [PATCH 40/44] Remove changes to AnalyzerTests --- .../xpack/esql/analysis/AnalyzerTests.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 72d08927e013e..ad2225d887942 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -148,7 +148,7 @@ import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; -@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug") +//@TestLogging(value = "org.elasticsearch.xpack.esql.analysis:TRACE", reason = "debug") public class AnalyzerTests extends ESTestCase { private static final UnresolvedRelation UNRESOLVED_RELATION = new UnresolvedRelation( @@ -2946,16 +2946,6 @@ public void testFromEnrichAndMatchColonUsage() { assertEquals(esRelation.indexPattern(), "test"); } - public void testSnippets() { - LogicalPlan plan = analyze(""" - from test - | EVAL x = extract_snippets(first_name, "text", 1, 10) - | KEEP x - """); - var limit = as(plan, Limit.class); - var filter = as(limit.child(), Filter.class); - } - public void testFunctionNamedParamsAsFunctionArgument() { LogicalPlan plan = analyze(""" from test From 80d105675f7ae8d1933a824b0934f88c932465fc Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 21 Aug 2025 10:16:22 -0400 Subject: [PATCH 41/44] Spotless --- .../org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index ad2225d887942..d72a97647c110 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.logging.LogManager; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.LoadMapping; import org.elasticsearch.xpack.esql.VerificationException; From ec3ac7ae095d1fc08bae7261eb95b7bb2fdc792e Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 21 Aug 2025 10:25:19 -0400 Subject: [PATCH 42/44] Add preview = true --- .../esql/expression/function/scalar/string/ExtractSnippets.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index 159070ddf11c1..c3f924d8a2c4e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -82,6 +82,7 @@ public class ExtractSnippets extends EsqlScalarFunction @FunctionInfo( returnType = "keyword", + preview = true, description = """ Extracts the most relevant snippets to return from a given input string""", examples = @Example(file = "keyword", tag = "extract_snippets") From 694bf6a043aa2fb31e066d2508e0779853bc59b1 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 21 Aug 2025 14:00:16 -0400 Subject: [PATCH 43/44] Add ExtractSnippetTests and associated generated documentation --- .../functions/description/extract_snippets.md | 6 ++ .../functions/examples/extract_snippets.md | 18 ++++ .../functions/layout/extract_snippets.md | 23 +++++ .../functions/parameters/extract_snippets.md | 16 ++++ .../functions/types/extract_snippets.md | 9 ++ .../images/functions/extract_snippets.svg | 1 + .../functions/extract_snippets.json | 49 +++++++++++ .../kibana/docs/functions/extract_snippets.md | 9 ++ .../scalar/string/ExtractSnippets.java | 10 ++- .../xpack/esql/SerializationTestUtils.java | 2 + .../scalar/string/ExtractSnippetsTests.java | 85 +++++++++++++++++++ 11 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md create mode 100644 docs/reference/query-languages/esql/images/functions/extract_snippets.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md new file mode 100644 index 0000000000000..d2368798306f1 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/extract_snippets.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Extracts the most relevant snippets to return from a given input string. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md new file mode 100644 index 0000000000000..741e7e43a74b4 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/extract_snippets.md @@ -0,0 +1,18 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```{applies_to} +stack: preview 9.2.0 +``` + +```esql +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +``` + +| book_no:keyword | author:text | title:text | snippets:keyword | +| --- | --- | --- | --- | +| 1211 | Fyodor Dostoevsky | The brothers Karamazov | achievement of perhaps th | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md new file mode 100644 index 0000000000000..69d7ee3b59f1b --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/extract_snippets.md @@ -0,0 +1,23 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `EXTRACT_SNIPPETS` [esql-extract_snippets] + +**Syntax** + +:::{image} ../../../images/functions/extract_snippets.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/extract_snippets.md +::: + +:::{include} ../description/extract_snippets.md +::: + +:::{include} ../types/extract_snippets.md +::: + +:::{include} ../examples/extract_snippets.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md new file mode 100644 index 0000000000000..8c5cea74e8512 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/extract_snippets.md @@ -0,0 +1,16 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`field` +: The input string + +`str` +: The input string + +`num_snippets` +: The number of snippets to return. Defaults to 1 + +`snippet_length` +: The length of snippets to return. Defaults to 10 + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md new file mode 100644 index 0000000000000..2072f7d99abad --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/extract_snippets.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| field | str | num_snippets | snippet_length | result | +| --- | --- | --- | --- | --- | +| keyword | keyword | | | keyword | +| text | keyword | | | keyword | + diff --git a/docs/reference/query-languages/esql/images/functions/extract_snippets.svg b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg new file mode 100644 index 0000000000000..c17eff787d563 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/extract_snippets.svg @@ -0,0 +1 @@ +EXTRACT_SNIPPETS(field,str,num_snippets,snippet_length) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json new file mode 100644 index 0000000000000..e1c0b90fb237b --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/extract_snippets.json @@ -0,0 +1,49 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "extract_snippets", + "description" : "Extracts the most relevant snippets to return from a given input string.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + }, + { + "name" : "str", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "The input string" + }, + { + "name" : "str", + "type" : "keyword", + "optional" : false, + "description" : "The input string" + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "examples" : [ + "FROM books\n| EVAL snippets = extract_snippets(description, \"crowning achievement\", 1, 25)" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md new file mode 100644 index 0000000000000..b7865446d397f --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/extract_snippets.md @@ -0,0 +1,9 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### EXTRACT SNIPPETS +Extracts the most relevant snippets to return from a given input string. + +```esql +FROM books +| EVAL snippets = extract_snippets(description, "crowning achievement", 1, 25) +``` diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java index c3f924d8a2c4e..6eb531356de4b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippets.java @@ -84,13 +84,14 @@ public class ExtractSnippets extends EsqlScalarFunction returnType = "keyword", preview = true, description = """ - Extracts the most relevant snippets to return from a given input string""", - examples = @Example(file = "keyword", tag = "extract_snippets") + Extracts the most relevant snippets to return from a given input string.""", + examples = { + @Example(file = "extract-snippets-function", tag = "extract-snippets-with-field", applies_to = "stack: preview 9.2.0") } ) public ExtractSnippets( Source source, - @Param(name = "field", type = { "keyword" }, description = "The input string") Expression field, - @Param(name = "str", type = { "keyword", "text" }, description = "The input string") Expression str, + @Param(name = "field", type = { "keyword", "text" }, description = "The input string") Expression field, + @Param(name = "str", type = { "keyword" }, description = "The input string") Expression str, @Param( optional = true, name = "num_snippets", @@ -105,6 +106,7 @@ public ExtractSnippets( ) Expression snippetLength ) { this(source, field, str, numSnippets, snippetLength, new MatchQueryBuilder(field.sourceText(), str.sourceText())); + } public ExtractSnippets( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java index e55a1b039258e..c87cc11306b13 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java @@ -18,6 +18,7 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.ExistsQueryBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.RegexpQueryBuilder; @@ -113,6 +114,7 @@ public static NamedWriteableRegistry writableRegistry() { entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, RegexpQueryBuilder.NAME, RegexpQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, ExistsQueryBuilder.NAME, ExistsQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, KnnVectorQueryBuilder.NAME, KnnVectorQueryBuilder::new)); + entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, MatchQueryBuilder.NAME, MatchQueryBuilder::new)); entries.add(SingleValueQuery.ENTRY); entries.addAll(ExpressionWritables.getNamedWriteables()); entries.addAll(PlanWritables.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java new file mode 100644 index 0000000000000..da5e85a0dff98 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ExtractSnippetsTests.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.stringCases; +import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("extract_snippets") +public class ExtractSnippetsTests extends AbstractFunctionTestCase { + + public ExtractSnippetsTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + return parameterSuppliersFromTypedData(testCaseSuppliers()); + } + + private static List testCaseSuppliers() { + List suppliers = new ArrayList<>(); + addStringTestCases(suppliers); + return suppliers; + } + + public static void addStringTestCases(List suppliers) { + for (DataType fieldType : DataType.stringTypes()) { + if (DataType.UNDER_CONSTRUCTION.containsKey(fieldType)) { + continue; + } + for (TestCaseSupplier.TypedDataSupplier queryDataSupplier : stringCases(fieldType)) { + suppliers.add( + TestCaseSupplier.testCaseSupplier( + queryDataSupplier, + new TestCaseSupplier.TypedDataSupplier(fieldType.typeName(), () -> randomAlphaOfLength(10), DataType.KEYWORD), + (d1, d2) -> equalTo("string"), + DataType.KEYWORD, + (o1, o2) -> true + ) + ); + } + } + } + + @Override + protected Expression build(Source source, List args) { + ExtractSnippets extractSnippets = new ExtractSnippets( + source, + args.get(0), + args.get(1), + args.size() > 2 ? args.get(2) : null, + args.size() > 3 ? args.get(3) : null + ); + // We need to add the QueryBuilder to the extract_snippets expression, as it is used to implement equals() and hashCode() and + // thus test the serialization methods. But we can only do this if the parameters make sense . + if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { + QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, extractSnippets).toQueryBuilder(); + extractSnippets.replaceQueryBuilder(queryBuilder); + } + return extractSnippets; + } +} From 0ef8fce1a00030331614960aa961d602aae96451 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 21 Aug 2025 16:07:07 -0400 Subject: [PATCH 44/44] Add integration test for extract_snippets --- .../plugin/ExtractSnippetsFunctionIT.java | 193 ++++++++++++++++++ .../xpack/esql/plugin/ExtractSnippetsIT.java | 74 ------- 2 files changed, 193 insertions(+), 74 deletions(-) create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java delete mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java new file mode 100644 index 0000000000000..d4a99d18d63ef --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsFunctionIT.java @@ -0,0 +1,193 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.client.internal.IndicesAdminClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.junit.Before; + +import java.util.Collections; +import java.util.List; +import java.util.function.Consumer; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class ExtractSnippetsFunctionIT extends AbstractEsqlIntegTestCase { + + private static final List EMPTY_RESULT = Collections.singletonList(null); + + @Before + public void setupIndex() { + createAndPopulateIndex(this::ensureYellow); + } + + public void testExtractSnippets() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of("The quick brown"), List.of("This is a brown"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) + ); + } + } + + public void testExtractMultipleSnippets() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of( + List.of(List.of("The quick brown", "Afterward, the")), + List.of(List.of("This is a brown", "Sometimes the b")), + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT + ) + ); + } + } + + public void testExtractSnippetsWithMatch() { + var query = """ + FROM test METADATA _score + | WHERE MATCH(content, "fox") + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues(resp.values(), List.of(List.of("The quick brown"), List.of("This is a brown"))); + } + } + + public void testExtractMultipleSnippetsWithMatch() { + var query = """ + FROM test METADATA _score + | WHERE MATCH(content, "fox") + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of(List.of("The quick brown", "Afterward, the")), List.of(List.of("This is a brown", "Sometimes the b"))) + ); + } + } + + public void testExtractSnippetDefaults() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox") + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of(List.of("is a brown"), List.of("quick brow"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) + ); + } + } + + public void testExtractSnippetDefaultLength() { + var query = """ + FROM test + | EVAL my_snippet = extract_snippets(content, "fox", 3) + | SORT my_snippet + | KEEP my_snippet + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("my_snippet")); + assertColumnTypes(resp.columns(), List.of("keyword")); + assertValues( + resp.values(), + List.of( + List.of(List.of("is a brown", "the brown")), + List.of(List.of("quick brow", "the brown")), + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT, + EMPTY_RESULT + ) + ); + } + } + + static void createAndPopulateIndex(Consumer ensureYellow) { + var indexName = "test"; + var client = client().admin().indices(); + var createRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(createRequest); + client().prepareBulk().add(new IndexRequest(indexName).id("1").source("id", 1, "content", """ + This is a brown fox that likes to run through the meadow. + Sometimes the brown fox pauses to look around before continuing. + """)).add(new IndexRequest(indexName).id("2").source("id", 2, "content", """ + This is a brown dog that spends most of the day sleeping in the yard. + The brown dog occasionally wakes up to bark at the mailman. + """)).add(new IndexRequest(indexName).id("3").source("id", 3, "content", """ + This dog is really brown and enjoys chasing sticks near the river. + People often comment on how brown the dog looks in the sunlight. + """)).add(new IndexRequest(indexName).id("4").source("id", 4, "content", """ + The quick brown fox jumps over the lazy dog whenever it feels playful. + Afterward, the brown fox runs off into the forest. + """)).add(new IndexRequest(indexName).id("5").source("id", 5, "content", """ + There is also a white cat that prefers to sit quietly by the window. + Unlike the other animals, the white cat ignores everything around it. + """)).add(new IndexRequest(indexName).id("6").source("id", 6, "content", """ + The dog is brown but this document is very very long, filled with many words describing the scene. + Even so, the brown dog is still the main focus of the story. + """)).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); + + var lookupIndexName = "test_lookup"; + createAndPopulateLookupIndex(client, lookupIndexName); + + ensureYellow.accept(new String[] { indexName, lookupIndexName }); + } + + static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) { + var createRequest = client.prepareCreate(lookupIndexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup")) + .setMapping("id", "type=integer", "lookup_content", "type=text"); + assertAcked(createRequest); + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java deleted file mode 100644 index 1637c2476bd38..0000000000000 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/ExtractSnippetsIT.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.plugin; - -import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.client.internal.IndicesAdminClient; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; -import org.junit.Before; - -import java.util.List; -import java.util.function.Consumer; - -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; - -//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") -public class ExtractSnippetsIT extends AbstractEsqlIntegTestCase { - - @Before - public void setupIndex() { - createAndPopulateIndex(this::ensureYellow); - } - - public void testExtractSnippets() { - var query = """ - FROM test - | EVAL x = extract_snippets(content, "fox", 1, 10) - | SORT x - | KEEP x - """; - - try (var resp = run(query)) { - assertColumnNames(resp.columns(), List.of("x")); - assertColumnTypes(resp.columns(), List.of("integer")); - assertValues(resp.values(), List.of(List.of(1), List.of(6))); - } - } - - static void createAndPopulateIndex(Consumer ensureYellow) { - var indexName = "test"; - var client = client().admin().indices(); - var createRequest = client.prepareCreate(indexName) - .setSettings(Settings.builder().put("index.number_of_shards", 1)) - .setMapping("id", "type=integer", "content", "type=text"); - assertAcked(createRequest); - client().prepareBulk() - .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox")) - .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog")) - .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown")) - .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long")) - .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat")) - .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog")) - .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) - .get(); - - var lookupIndexName = "test_lookup"; - createAndPopulateLookupIndex(client, lookupIndexName); - - ensureYellow.accept(new String[] { indexName, lookupIndexName }); - } - - static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) { - var createRequest = client.prepareCreate(lookupIndexName) - .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup")) - .setMapping("id", "type=integer", "lookup_content", "type=text"); - assertAcked(createRequest); - } -}