From 79b7e7201de5a45f97a0ad644f70010792d0bd13 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 26 Aug 2025 09:31:10 -0400 Subject: [PATCH 01/24] Instead of generating snippets via highlighter, chunk and score chunks in text similarity reranker --- .../core/src/main/java/module-info.java | 1 + .../core/common/snippets/SnippetScorer.java | 133 ++++++++++++++++++ .../common/snippets/SnippetScorerTests.java | 56 ++++++++ .../TextSimilarityRankBuilder.java | 2 +- ...nkingRankFeaturePhaseRankShardContext.java | 87 +++++------- .../70_text_similarity_rank_retriever.yml | 37 ----- 6 files changed, 228 insertions(+), 88 deletions(-) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java diff --git a/x-pack/plugin/core/src/main/java/module-info.java b/x-pack/plugin/core/src/main/java/module-info.java index 3d28eff88a24a..a520a7a7abfc6 100644 --- a/x-pack/plugin/core/src/main/java/module-info.java +++ b/x-pack/plugin/core/src/main/java/module-info.java @@ -231,6 +231,7 @@ exports org.elasticsearch.xpack.core.watcher.watch; exports org.elasticsearch.xpack.core.watcher; exports org.elasticsearch.xpack.core.security.authc.apikey; + exports org.elasticsearch.xpack.core.common.snippets; provides org.elasticsearch.action.admin.cluster.node.info.ComponentVersionNumber with diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java new file mode 100644 index 0000000000000..e6a94eaf5e864 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java @@ -0,0 +1,133 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.common.snippets; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Utility class for scoring snippets using an in-memory Lucene index. + */ +public class SnippetScorer { + + private static final String CONTENT_FIELD = "content"; + + private final StandardAnalyzer analyzer; + + public SnippetScorer() { + this.analyzer = new StandardAnalyzer(); + } + + /** + * Creates an in-memory index of snippets, or snippets, returns ordered, scored list. + * + * @param snippets the list of text snippets to score + * @param inferenceText the query text to compare against + * @param maxResults maximum number of results to return + * @return list of scored snippets ordered by relevance + * @throws IOException on failure scoring snippets + */ + public List scoreSnippets(List snippets, String inferenceText, int maxResults) throws IOException { + if (snippets == null || snippets.isEmpty() || inferenceText == null || inferenceText.trim().isEmpty()) { + return new ArrayList<>(); + } + + try (Directory directory = new ByteBuffersDirectory()) { + IndexWriterConfig config = new IndexWriterConfig(analyzer); + try (IndexWriter writer = new IndexWriter(directory, config)) { + for (int i = 0; i < snippets.size(); i++) { + Document doc = new Document(); + doc.add(new TextField(CONTENT_FIELD, snippets.get(i), Field.Store.YES)); + writer.addDocument(doc); + } + writer.commit(); + } + + try (DirectoryReader reader = DirectoryReader.open(directory)) { + IndexSearcher searcher = new IndexSearcher(reader); + + Query query = createQuery(inferenceText); + int numResults = Math.min(maxResults, snippets.size()); + TopDocs topDocs = searcher.search(query, numResults); + + List scoredSnippets = new ArrayList<>(); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + Document doc = reader.storedFields().document(scoreDoc.doc); + String content = doc.get(CONTENT_FIELD); + scoredSnippets.add(new ScoredSnippet(content, scoreDoc.score)); + } + + return scoredSnippets; + } + } + } + + /** + * Creates a Lucene query from the inference text. + * This method creates a boolean query with terms from the inference text. + */ + private Query createQuery(String inferenceText) throws IOException { + String[] tokens = tokenizeText(inferenceText); + + if (tokens.length == 0) { + throw new IllegalArgumentException("Inference text must contain at least one valid token"); + } else if (tokens.length == 1) { + return new TermQuery(new Term(CONTENT_FIELD, tokens[0])); + } else { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (String token : tokens) { + if (token != null && token.trim().isEmpty() == false) { + builder.add(new TermQuery(new Term(CONTENT_FIELD, token)), BooleanClause.Occur.SHOULD); + } + } + return builder.build(); + } + } + + /** + * Tokenizes the input text using the analyzer + */ + private String[] tokenizeText(String text) throws IOException { + List tokens = new ArrayList<>(); + try (org.apache.lucene.analysis.TokenStream tokenStream = analyzer.tokenStream(CONTENT_FIELD, text)) { + org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAttribute = tokenStream.addAttribute( + org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class + ); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + tokens.add(termAttribute.toString()); + } + tokenStream.end(); + } + return tokens.toArray(new String[0]); + } + + /** + * Represents a snippet with its relevance score and original position. + */ + public record ScoredSnippet(String content, float score) {} +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java new file mode 100644 index 0000000000000..e4fc3ad642f6d --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.common.snippets; + +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.hamcrest.Matchers.greaterThan; + +public class SnippetScorerTests extends ESTestCase { + + public void testScoreSnippets() throws IOException { + SnippetScorer scorer = new SnippetScorer(); + + List snippets = Arrays.asList( + "Cats like to sleep all day and play with mice", + "Dogs are loyal companions and great pets", + "The weather today is very sunny and warm", + "Dogs love to play with toys and go for walks", + "Elasticsearch is a great search engine" + ); + + String inferenceText = "dogs play walk"; + int maxResults = 3; + + List scoredSnippets = scorer.scoreSnippets(snippets, inferenceText, maxResults); + + assertEquals(maxResults, scoredSnippets.size()); + + // The snippets about dogs should score highest, followed by the snippet about cats + SnippetScorer.ScoredSnippet snippet = scoredSnippets.getFirst(); + assertTrue(snippet.content().equalsIgnoreCase("Dogs love to play with toys and go for walks")); + assertThat(snippet.score(), greaterThan(0f)); + + snippet = scoredSnippets.get(1); + assertTrue(snippet.content().equalsIgnoreCase("Dogs are loyal companions and great pets")); + assertThat(snippet.score(), greaterThan(0f)); + + snippet = scoredSnippets.get(2); + assertTrue(snippet.content().equalsIgnoreCase("Cats like to sleep all day and play with mice")); + assertThat(snippet.score(), greaterThan(0f)); + + // Scores should be in descending order + for (int i = 1; i < scoredSnippets.size(); i++) { + assertTrue(scoredSnippets.get(i - 1).score() >= scoredSnippets.get(i).score()); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java index 6e213c5906b23..243a9a0d2d565 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java @@ -50,7 +50,7 @@ public class TextSimilarityRankBuilder extends RankBuilder { /** * The default token size limit of the Elastic reranker is 512. */ - private static final int DEFAULT_TOKEN_SIZE_LIMIT = 512; + public static final int DEFAULT_TOKEN_SIZE_LIMIT = 512; public static final LicensedFeature.Momentary TEXT_SIMILARITY_RERANKER_FEATURE = LicensedFeature.momentary( null, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index 66fb4a366a757..898aaca833874 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -8,38 +8,39 @@ package org.elasticsearch.xpack.inference.rank.textsimilarity; import org.elasticsearch.common.document.DocumentField; -import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; -import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; -import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.rank.RankShardResult; import org.elasticsearch.search.rank.feature.RankFeatureDoc; import org.elasticsearch.search.rank.feature.RankFeatureShardResult; import org.elasticsearch.search.rank.rerank.RerankingRankFeaturePhaseRankShardContext; -import org.elasticsearch.xcontent.Text; +import org.elasticsearch.xpack.core.common.snippets.SnippetScorer; +import org.elasticsearch.xpack.inference.chunking.Chunker; +import org.elasticsearch.xpack.inference.chunking.ChunkerBuilder; +import org.elasticsearch.xpack.inference.chunking.SentenceBoundaryChunkingSettings; import java.io.IOException; -import java.util.Arrays; import java.util.List; -import java.util.Map; import static org.elasticsearch.xpack.inference.rank.textsimilarity.SnippetConfig.DEFAULT_NUM_SNIPPETS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankBuilder.DEFAULT_TOKEN_SIZE_LIMIT; public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends RerankingRankFeaturePhaseRankShardContext { - private final SnippetConfig snippetRankInput; + private final ChunkingSettings DEFAULT_CHUNKING_SETTINGS = new SentenceBoundaryChunkingSettings(DEFAULT_TOKEN_SIZE_LIMIT, 0); - // Rough approximation of token size vs. characters in highlight fragments. - // TODO: highlighter should be able to set fragment size by token not length - private static final int TOKEN_SIZE_LIMIT_MULTIPLIER = 5; + private final SnippetConfig snippetRankInput; + private final ChunkingSettings chunkingSettings; + private final Chunker chunker; public TextSimilarityRerankingRankFeaturePhaseRankShardContext(String field, @Nullable SnippetConfig snippetRankInput) { super(field); this.snippetRankInput = snippetRankInput; + // TODO allow customization through snippetRankInput + chunkingSettings = DEFAULT_CHUNKING_SETTINGS; + chunker = ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy()); } @Override @@ -49,49 +50,35 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI rankFeatureDocs[i] = new RankFeatureDoc(hits.getHits()[i].docId(), hits.getHits()[i].getScore(), shardId); SearchHit hit = hits.getHits()[i]; DocumentField docField = hit.field(field); - if (snippetRankInput == null && docField != null) { - rankFeatureDocs[i].featureData(List.of(docField.getValue().toString())); - } else { - Map highlightFields = hit.getHighlightFields(); - if (highlightFields != null && highlightFields.containsKey(field) && highlightFields.get(field).fragments().length > 0) { - List snippets = Arrays.stream(highlightFields.get(field).fragments()).map(Text::string).toList(); - rankFeatureDocs[i].featureData(snippets); - } else if (docField != null) { - // If we did not get highlighting results, backfill with the doc field value - // but pass in a warning because we are not reranking on snippets only + if (docField != null) { + if (snippetRankInput != null) { + int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS; + List chunkOffsets = chunker.chunk(docField.getValue().toString(), chunkingSettings); + List chunks = chunkOffsets.stream() + .map(offset -> { return docField.getValue().toString().substring(offset.start(), offset.end()); }) + .toList(); + + List bestChunks; + try { + SnippetScorer scorer = new SnippetScorer(); + List scoredSnippets = scorer.scoreSnippets( + chunks, + snippetRankInput.inferenceText(), + numSnippets + ); + bestChunks = scoredSnippets.stream().map(SnippetScorer.ScoredSnippet::content).limit(numSnippets).toList(); + } catch (IOException e) { + // TODO - Should this throw, or truncate/send a warning header? + throw new IllegalStateException("Could not generate snippets for input to reranker", e); + } + rankFeatureDocs[i].featureData(bestChunks); + + } else { rankFeatureDocs[i].featureData(List.of(docField.getValue().toString())); - HeaderWarning.addWarning( - "Reranking on snippets requested, but no snippets were found for field [" + field + "]. Using field value instead." - ); } } } return new RankFeatureShardResult(rankFeatureDocs); } - @Override - public void prepareForFetch(SearchContext context) { - if (snippetRankInput != null) { - try { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - highlightBuilder.highlightQuery(snippetRankInput.snippetQueryBuilder()); - // Stripping pre/post tags as they're not useful for snippet creation - highlightBuilder.field(field).preTags("").postTags(""); - // Return highest scoring fragments - highlightBuilder.order(HighlightBuilder.Order.SCORE); - int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS; - highlightBuilder.numOfFragments(numSnippets); - // Rely on the model to determine the fragment size - int tokenSizeLimit = snippetRankInput.tokenSizeLimit(); - int fragmentSize = tokenSizeLimit * TOKEN_SIZE_LIMIT_MULTIPLIER; - highlightBuilder.fragmentSize(fragmentSize); - highlightBuilder.noMatchSize(fragmentSize); - SearchHighlightContext searchHighlightContext = highlightBuilder.build(context.getSearchExecutionContext()); - context.highlight(searchHighlightContext); - } catch (IOException e) { - throw new RuntimeException("Failed to generate snippet request", e); - } - } - } - } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 3dd85ef9e8658..2553ee2c9732b 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -680,40 +680,3 @@ setup: - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - ---- -"Reranking based on snippets when highlighter doesn't return results": - - - requires: - test_runner_features: allowed_warnings - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 - - - do: - allowed_warnings: - - "Reranking on snippets requested, but no snippets were found for field [inference_text_field]. Using field value instead." - search: - index: test-index - body: - track_total_hits: true - fields: [ "text", "topic" ] - retriever: - text_similarity_reranker: - retriever: - standard: - query: - term: - topic: "science" - rank_window_size: 10 - inference_id: my-rerank-model - inference_text: "How often does the moon hide the sun?" - field: inference_text_field - snippets: - num_snippets: 2 - size: 10 - - - match: { hits.total.value: 2 } - - length: { hits.hits: 2 } - - - match: { hits.hits.0._id: "doc_2" } - - match: { hits.hits.1._id: "doc_1" } From 9f28c0808ce4801ceec2aac5640c4b5d2fee5b7d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 26 Aug 2025 18:23:33 +0000 Subject: [PATCH 02/24] [CI] Auto commit changes from spotless --- .../xpack/core/common/snippets/SnippetScorer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java index e6a94eaf5e864..ec517142ea881 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java @@ -14,13 +14,13 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; From 49d25a79374c30039ea7cca786acc2c443fbff32 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 08:19:40 -0400 Subject: [PATCH 03/24] Add customization based on preferred chunking settings or chunk size for syntactic sugar --- .../inference/ChunkingSettings.java | 2 + .../core/common/snippets/SnippetScorer.java | 10 +- .../SentenceBoundaryChunkingSettings.java | 27 ++++ .../WordBoundaryChunkingSettings.java | 27 ++++ .../rank/textsimilarity/SnippetConfig.java | 58 +++++--- .../TextSimilarityRankBuilder.java | 62 +------- .../TextSimilarityRankRetrieverBuilder.java | 20 ++- ...nkingRankFeaturePhaseRankShardContext.java | 10 +- ...aturePhaseRankCoordinatorContextTests.java | 2 +- .../70_text_similarity_rank_retriever.yml | 140 ++++++++++++++++++ 10 files changed, 254 insertions(+), 104 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java b/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java index 34b3e5a6d58ee..fb0bbca33aa7d 100644 --- a/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java +++ b/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java @@ -18,4 +18,6 @@ public interface ChunkingSettings extends ToXContentObject, VersionedNamedWritea ChunkingStrategy getChunkingStrategy(); Map asMap(); + + default void validate() {} } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java index ec517142ea881..fa110eda597ea 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java @@ -59,9 +59,9 @@ public List scoreSnippets(List snippets, String inference try (Directory directory = new ByteBuffersDirectory()) { IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter writer = new IndexWriter(directory, config)) { - for (int i = 0; i < snippets.size(); i++) { + for (String snippet : snippets) { Document doc = new Document(); - doc.add(new TextField(CONTENT_FIELD, snippets.get(i), Field.Store.YES)); + doc.add(new TextField(CONTENT_FIELD, snippet, Field.Store.YES)); writer.addDocument(doc); } writer.commit(); @@ -88,7 +88,6 @@ public List scoreSnippets(List snippets, String inference /** * Creates a Lucene query from the inference text. - * This method creates a boolean query with terms from the inference text. */ private Query createQuery(String inferenceText) throws IOException { String[] tokens = tokenizeText(inferenceText); @@ -108,9 +107,6 @@ private Query createQuery(String inferenceText) throws IOException { } } - /** - * Tokenizes the input text using the analyzer - */ private String[] tokenizeText(String text) throws IOException { List tokens = new ArrayList<>(); try (org.apache.lucene.analysis.TokenStream tokenStream = analyzer.tokenStream(CONTENT_FIELD, text)) { @@ -127,7 +123,7 @@ private String[] tokenizeText(String text) throws IOException { } /** - * Represents a snippet with its relevance score and original position. + * Represents a snippet with its relevance score. */ public record ScoredSnippet(String content, float score) {} } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java index b87e164089d31..160259f88240b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java @@ -55,6 +55,33 @@ public SentenceBoundaryChunkingSettings(StreamInput in) throws IOException { } } + @Override + public void validate() { + ValidationException validationException = new ValidationException(); + + if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) { + validationException.addValidationError( + ChunkingSettingsOptions.MAX_CHUNK_SIZE + + "[" + + maxChunkSize + + "] must be between " + + MAX_CHUNK_SIZE_LOWER_LIMIT + + " and " + + MAX_CHUNK_SIZE_UPPER_LIMIT + ); + } + + if (sentenceOverlap > 1 || sentenceOverlap < 0) { + validationException.addValidationError( + ChunkingSettingsOptions.SENTENCE_OVERLAP + "[" + sentenceOverlap + "] must be either 0 or 1" + ); + } + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + } + @Override public Map asMap() { return Map.of( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java index 97f8aa49ef4d1..46c4a185e8732 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java @@ -49,6 +49,33 @@ public WordBoundaryChunkingSettings(StreamInput in) throws IOException { overlap = in.readInt(); } + @Override + public void validate() { + ValidationException validationException = new ValidationException(); + + if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) { + validationException.addValidationError( + ChunkingSettingsOptions.MAX_CHUNK_SIZE + + "[" + + maxChunkSize + + "] must be between " + + MAX_CHUNK_SIZE_LOWER_LIMIT + + " and " + + MAX_CHUNK_SIZE_UPPER_LIMIT + ); + } + + if (overlap > maxChunkSize / 2) { + validationException.addValidationError( + ChunkingSettingsOptions.OVERLAP + "[" + overlap + "] must be less than or equal to half of max chunk size" + ); + } + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + } + @Override public Map asMap() { return Map.of( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java index f25ee40ca7ab1..aa9f953ca31df 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java @@ -10,48 +10,65 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.inference.ChunkingSettings; +import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder; +import org.elasticsearch.xpack.inference.chunking.SentenceBoundaryChunkingSettings; import java.io.IOException; +import java.util.Map; import java.util.Objects; public class SnippetConfig implements Writeable { public final Integer numSnippets; private final String inferenceText; - private final Integer tokenSizeLimit; - public final QueryBuilder snippetQueryBuilder; + private final ChunkingSettings chunkingSettings; + public static final int DEFAULT_CHUNK_SIZE = 300; public static final int DEFAULT_NUM_SNIPPETS = 1; + public static ChunkingSettings createChunkingSettings(Integer chunkSize) { + int chunkSizeOrDefault = chunkSize != null ? chunkSize : DEFAULT_CHUNK_SIZE; + ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(chunkSizeOrDefault, 0); + chunkingSettings.validate(); + return chunkingSettings; + } + public SnippetConfig(StreamInput in) throws IOException { this.numSnippets = in.readOptionalVInt(); this.inferenceText = in.readString(); - this.tokenSizeLimit = in.readOptionalVInt(); - this.snippetQueryBuilder = in.readOptionalNamedWriteable(QueryBuilder.class); + Map chunkingSettingsMap = in.readGenericMap(); + this.chunkingSettings = ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); + } + + public SnippetConfig(Integer numSnippets, ChunkingSettings chunkingSettings, Integer chunkSize) { + this(numSnippets, null, chunkingSettings, chunkSize); } - public SnippetConfig(Integer numSnippets) { - this(numSnippets, null, null); + public SnippetConfig(Integer numSnippets, String inferenceText, Integer chunkSize) { + this(numSnippets, inferenceText, null, chunkSize); } - public SnippetConfig(Integer numSnippets, String inferenceText, Integer tokenSizeLimit) { - this(numSnippets, inferenceText, tokenSizeLimit, null); + public SnippetConfig(Integer numSnippets, String inferenceText, ChunkingSettings chunkingSettings) { + this(numSnippets, inferenceText, chunkingSettings, null); } - public SnippetConfig(Integer numSnippets, String inferenceText, Integer tokenSizeLimit, QueryBuilder snippetQueryBuilder) { + public SnippetConfig(Integer numSnippets, String inferenceText, ChunkingSettings chunkingSettings, Integer chunkSize) { + + if (chunkingSettings != null && chunkSize != null) { + throw new IllegalArgumentException("Only one of chunking_settings or chunk_size may be provided"); + } + this.numSnippets = numSnippets; this.inferenceText = inferenceText; - this.tokenSizeLimit = tokenSizeLimit; - this.snippetQueryBuilder = snippetQueryBuilder; + this.chunkingSettings = chunkingSettings != null ? chunkingSettings : createChunkingSettings(chunkSize); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalVInt(numSnippets); out.writeString(inferenceText); - out.writeOptionalVInt(tokenSizeLimit); - out.writeOptionalNamedWriteable(snippetQueryBuilder); + out.writeGenericMap(chunkingSettings.asMap()); } public Integer numSnippets() { @@ -62,12 +79,8 @@ public String inferenceText() { return inferenceText; } - public Integer tokenSizeLimit() { - return tokenSizeLimit; - } - - public QueryBuilder snippetQueryBuilder() { - return snippetQueryBuilder; + public ChunkingSettings chunkingSettings() { + return chunkingSettings; } @Override @@ -77,12 +90,11 @@ public boolean equals(Object o) { SnippetConfig that = (SnippetConfig) o; return Objects.equals(numSnippets, that.numSnippets) && Objects.equals(inferenceText, that.inferenceText) - && Objects.equals(tokenSizeLimit, that.tokenSizeLimit) - && Objects.equals(snippetQueryBuilder, that.snippetQueryBuilder); + && Objects.equals(chunkingSettings, that.chunkingSettings); } @Override public int hashCode() { - return Objects.hash(numSnippets, inferenceText, tokenSizeLimit, snippetQueryBuilder); + return Objects.hash(numSnippets, inferenceText, chunkingSettings); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java index 243a9a0d2d565..52543c8ad3a60 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java @@ -15,9 +15,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.index.query.MatchQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.license.License; import org.elasticsearch.license.LicensedFeature; import org.elasticsearch.search.rank.RankBuilder; @@ -47,11 +44,6 @@ public class TextSimilarityRankBuilder extends RankBuilder { public static final String NAME = "text_similarity_reranker"; - /** - * The default token size limit of the Elastic reranker is 512. - */ - public static final int DEFAULT_TOKEN_SIZE_LIMIT = 512; - public static final LicensedFeature.Momentary TEXT_SIMILARITY_RERANKER_FEATURE = LicensedFeature.momentary( null, "text-similarity-reranker", @@ -147,50 +139,6 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio } } - @Override - public RankBuilder rewrite(QueryRewriteContext queryRewriteContext) throws IOException { - TextSimilarityRankBuilder rewritten = this; - if (snippetConfig != null) { - QueryBuilder snippetQueryBuilder = snippetConfig.snippetQueryBuilder(); - if (snippetQueryBuilder == null) { - rewritten = new TextSimilarityRankBuilder( - field, - inferenceId, - inferenceText, - rankWindowSize(), - minScore, - failuresAllowed, - new SnippetConfig( - snippetConfig.numSnippets(), - snippetConfig.inferenceText(), - snippetConfig.tokenSizeLimit(), - new MatchQueryBuilder(field, inferenceText) - ) - ); - } else { - QueryBuilder rewrittenSnippetQueryBuilder = snippetQueryBuilder.rewrite(queryRewriteContext); - if (snippetQueryBuilder != rewrittenSnippetQueryBuilder) { - rewritten = new TextSimilarityRankBuilder( - field, - inferenceId, - inferenceText, - rankWindowSize(), - minScore, - failuresAllowed, - new SnippetConfig( - snippetConfig.numSnippets(), - snippetConfig.inferenceText(), - snippetConfig.tokenSizeLimit(), - rewrittenSnippetQueryBuilder - ) - ); - } - } - } - - return rewritten; - } - @Override public boolean isCompoundBuilder() { return false; @@ -249,18 +197,10 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo inferenceText, minScore, failuresAllowed, - snippetConfig != null ? new SnippetConfig(snippetConfig.numSnippets, inferenceText, tokenSizeLimit(inferenceId)) : null + snippetConfig != null ? new SnippetConfig(snippetConfig.numSnippets, inferenceText, snippetConfig.chunkingSettings()) : null ); } - /** - * @return The token size limit to apply to this rerank context. - * TODO: This should be pulled from the inference endpoint when available, not hardcoded. - */ - public static Integer tokenSizeLimit(String inferenceId) { - return DEFAULT_TOKEN_SIZE_LIMIT; - } - public String field() { return field; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 18bbbd8a2c134..630560131efb8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.search.builder.SearchSourceBuilder; @@ -22,10 +23,12 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Objects; import static org.elasticsearch.search.rank.RankBuilder.DEFAULT_RANK_WINDOW_SIZE; @@ -52,6 +55,8 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder public static final ParseField FAILURES_ALLOWED_FIELD = new ParseField("allow_rerank_failures"); public static final ParseField SNIPPETS_FIELD = new ParseField("snippets"); public static final ParseField NUM_SNIPPETS_FIELD = new ParseField("num_snippets"); + public static final ParseField CHUNK_SIZE_FIELD = new ParseField("chunk_size"); + public static final ParseField CHUNKING_SETTINGS_FIELD = new ParseField("chunking_settings"); public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(TextSimilarityRankBuilder.NAME, args -> { @@ -79,7 +84,11 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder true, args -> { Integer numSnippets = (Integer) args[0]; - return new SnippetConfig(numSnippets); + Integer chunkSize = (Integer) args[1]; + @SuppressWarnings("unchecked") + Map chunkingSettingsMap = (Map) args[2]; + ChunkingSettings chunkingSettings = chunkingSettingsMap != null ? ChunkingSettingsBuilder.fromMap(chunkingSettingsMap) : null; + return new SnippetConfig(numSnippets, chunkingSettings, chunkSize); } ); @@ -97,6 +106,8 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder PARSER.declareObject(optionalConstructorArg(), SNIPPETS_PARSER, SNIPPETS_FIELD); if (RERANK_SNIPPETS.isEnabled()) { SNIPPETS_PARSER.declareInt(optionalConstructorArg(), NUM_SNIPPETS_FIELD); + SNIPPETS_PARSER.declareInt(optionalConstructorArg(), CHUNK_SIZE_FIELD); + SNIPPETS_PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> p.map(), null, CHUNKING_SETTINGS_FIELD); } RetrieverBuilder.declareBaseParserFields(PARSER); @@ -215,9 +226,7 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu rankWindowSize, minScore, failuresAllowed, - snippets != null - ? new SnippetConfig(snippets.numSnippets, inferenceText, TextSimilarityRankBuilder.tokenSizeLimit(inferenceId)) - : null + snippets != null ? new SnippetConfig(snippets.numSnippets, inferenceText, snippets.chunkingSettings()) : null ) ); return sourceBuilder; @@ -251,6 +260,9 @@ protected void doToXContent(XContentBuilder builder, Params params) throws IOExc if (snippets.numSnippets() != null) { builder.field(NUM_SNIPPETS_FIELD.getPreferredName(), snippets.numSnippets()); } + if (snippets.chunkingSettings() != null) { + builder.field(CHUNKING_SETTINGS_FIELD.getPreferredName(), snippets.chunkingSettings().asMap()); + } builder.endObject(); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index 898aaca833874..8331e3522d313 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -19,18 +19,14 @@ import org.elasticsearch.xpack.core.common.snippets.SnippetScorer; import org.elasticsearch.xpack.inference.chunking.Chunker; import org.elasticsearch.xpack.inference.chunking.ChunkerBuilder; -import org.elasticsearch.xpack.inference.chunking.SentenceBoundaryChunkingSettings; import java.io.IOException; import java.util.List; import static org.elasticsearch.xpack.inference.rank.textsimilarity.SnippetConfig.DEFAULT_NUM_SNIPPETS; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankBuilder.DEFAULT_TOKEN_SIZE_LIMIT; public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends RerankingRankFeaturePhaseRankShardContext { - private final ChunkingSettings DEFAULT_CHUNKING_SETTINGS = new SentenceBoundaryChunkingSettings(DEFAULT_TOKEN_SIZE_LIMIT, 0); - private final SnippetConfig snippetRankInput; private final ChunkingSettings chunkingSettings; private final Chunker chunker; @@ -38,9 +34,8 @@ public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends Rer public TextSimilarityRerankingRankFeaturePhaseRankShardContext(String field, @Nullable SnippetConfig snippetRankInput) { super(field); this.snippetRankInput = snippetRankInput; - // TODO allow customization through snippetRankInput - chunkingSettings = DEFAULT_CHUNKING_SETTINGS; - chunker = ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy()); + chunkingSettings = snippetRankInput != null ? snippetRankInput.chunkingSettings() : null; + chunker = chunkingSettings != null ? ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy()) : null; } @Override @@ -68,7 +63,6 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI ); bestChunks = scoredSnippets.stream().map(SnippetScorer.ScoredSnippet::content).limit(numSnippets).toList(); } catch (IOException e) { - // TODO - Should this throw, or truncate/send a warning header? throw new IllegalStateException("Could not generate snippets for input to reranker", e); } rankFeatureDocs[i].featureData(bestChunks); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java index 27aa8b6fb5b5a..9a2b563bc9c1c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java @@ -48,7 +48,7 @@ public class TextSimilarityRankFeaturePhaseRankCoordinatorContextTests extends E "some query", 0.0f, false, - new SnippetConfig(2, "some query", 10) + new SnippetConfig(2, "some query", 20) ); public void testComputeScores() { diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 2553ee2c9732b..8d1918624e9a4 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -680,3 +680,143 @@ setup: - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } + +--- +"Reranking based on snippets on a semantic_text field specifying chunking settings": + + - requires: + cluster_features: "text_similarity_reranker_snippets" + reason: snippets introduced in 9.2.0 + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "semantic_text_field", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match: + topic: + query: "science" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "how often does the moon hide the sun?" + field: semantic_text_field + snippets: + chunking_settings: + strategy: sentence + max_chunk_size: 20 + sentence_overlap: 0 + size: 10 + + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } + +--- +"Reranking based on snippets on a semantic_text field specifying chunking settings requires valid chunking settings": + + - requires: + cluster_features: "text_similarity_reranker_snippets" + reason: snippets introduced in 9.2.0 + + - do: + catch: /Invalid value/ + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "semantic_text_field", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match: + topic: + query: "science" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "how often does the moon hide the sun?" + field: semantic_text_field + snippets: + chunking_settings: + strategy: sentence + max_chunk_size: 10 + sentence_overlap: 20 + size: 10 + +--- +"Reranking based on snippets on a semantic_text field specifying chunk size": + + - requires: + cluster_features: "text_similarity_reranker_snippets" + reason: snippets introduced in 9.2.0 + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "semantic_text_field", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match: + topic: + query: "science" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "how often does the moon hide the sun?" + field: semantic_text_field + snippets: + chunk_size: 20 + size: 10 + + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } + +--- +"Reranking based on snippets on a semantic_text field cannot specify both chunking settings and chunk size": + + - requires: + cluster_features: "text_similarity_reranker_snippets" + reason: snippets introduced in 9.2.0 + + - do: + catch: /Only one of chunking_settings or chunk_size may be provided/ + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "semantic_text_field", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match: + topic: + query: "science" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "how often does the moon hide the sun?" + field: semantic_text_field + snippets: + chunk_size: 20 + chunking_settings: + strategy: sentence + max_chunk_size: 20 + sentence_overlap: 0 + size: 10 From 0036271f0e5a850d4bf42d1dbde1cc158d2f08b9 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 14:58:36 -0400 Subject: [PATCH 04/24] Cleanup --- .../chunking/RecursiveChunkingSettings.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java index 611736ceb4213..402f7b49c7e01 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java @@ -53,6 +53,31 @@ public RecursiveChunkingSettings(StreamInput in) throws IOException { separators = in.readCollectionAsList(StreamInput::readString); } + @Override + public void validate() { + ValidationException validationException = new ValidationException(); + + if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) { + validationException.addValidationError( + ChunkingSettingsOptions.MAX_CHUNK_SIZE + + "[" + + maxChunkSize + + "] must be between " + + MAX_CHUNK_SIZE_LOWER_LIMIT + + " and " + + MAX_CHUNK_SIZE_UPPER_LIMIT + ); + + if (separators != null && separators.isEmpty()) { + validationException.addValidationError("Recursive chunking settings can not have an empty list of separators"); + } + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + } + } + public static RecursiveChunkingSettings fromMap(Map map) { ValidationException validationException = new ValidationException(); From 2df2f9da3a468a7b30a8158f535b19eb204da60f Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 14:59:57 -0400 Subject: [PATCH 05/24] Update docs/changelog/133576.yaml --- docs/changelog/133576.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133576.yaml diff --git a/docs/changelog/133576.yaml b/docs/changelog/133576.yaml new file mode 100644 index 0000000000000..31b87f9fbebda --- /dev/null +++ b/docs/changelog/133576.yaml @@ -0,0 +1,5 @@ +pr: 133576 +summary: Text similarity reranker chunks and scores snippets +area: Relevance +type: enhancement +issues: [] From 8b7f7f2bf47289de7cdfbef3f15f9f3ba6e34ec9 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 16:13:03 -0400 Subject: [PATCH 06/24] Refactor/Rename SnippetScorer to MemoryIndexChunkScorer --- .../core/src/main/java/module-info.java | 2 +- .../MemoryIndexChunkScorer.java} | 37 ++++++++++--------- .../MemoryIndexChunkScorerTests.java} | 22 +++++------ ...nkingRankFeaturePhaseRankShardContext.java | 8 ++-- 4 files changed, 35 insertions(+), 34 deletions(-) rename x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/{snippets/SnippetScorer.java => chunks/MemoryIndexChunkScorer.java} (75%) rename x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/{snippets/SnippetScorerTests.java => chunks/MemoryIndexChunkScorerTests.java} (67%) diff --git a/x-pack/plugin/core/src/main/java/module-info.java b/x-pack/plugin/core/src/main/java/module-info.java index a520a7a7abfc6..20c6fffa8a544 100644 --- a/x-pack/plugin/core/src/main/java/module-info.java +++ b/x-pack/plugin/core/src/main/java/module-info.java @@ -231,7 +231,7 @@ exports org.elasticsearch.xpack.core.watcher.watch; exports org.elasticsearch.xpack.core.watcher; exports org.elasticsearch.xpack.core.security.authc.apikey; - exports org.elasticsearch.xpack.core.common.snippets; + exports org.elasticsearch.xpack.core.common.chunks; provides org.elasticsearch.action.admin.cluster.node.info.ComponentVersionNumber with diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java similarity index 75% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java index fa110eda597ea..748a9b8ba65de 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.common.snippets; +package org.elasticsearch.xpack.core.common.chunks; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -30,38 +30,39 @@ import java.util.List; /** - * Utility class for scoring snippets using an in-memory Lucene index. + * Utility class for scoring pre-determined chunks using an in-memory Lucene index. */ -public class SnippetScorer { +public class MemoryIndexChunkScorer { private static final String CONTENT_FIELD = "content"; private final StandardAnalyzer analyzer; - public SnippetScorer() { + public MemoryIndexChunkScorer() { + // TODO: Allow analyzer to be customizable and/or read from the field mapping this.analyzer = new StandardAnalyzer(); } /** - * Creates an in-memory index of snippets, or snippets, returns ordered, scored list. + * Creates an in-memory index of chunks, or chunks, returns ordered, scored list. * - * @param snippets the list of text snippets to score + * @param chunks the list of text chunks to score * @param inferenceText the query text to compare against * @param maxResults maximum number of results to return - * @return list of scored snippets ordered by relevance - * @throws IOException on failure scoring snippets + * @return list of scored chunks ordered by relevance + * @throws IOException on failure scoring chunks */ - public List scoreSnippets(List snippets, String inferenceText, int maxResults) throws IOException { - if (snippets == null || snippets.isEmpty() || inferenceText == null || inferenceText.trim().isEmpty()) { + public List scoreChunks(List chunks, String inferenceText, int maxResults) throws IOException { + if (chunks == null || chunks.isEmpty() || inferenceText == null || inferenceText.trim().isEmpty()) { return new ArrayList<>(); } try (Directory directory = new ByteBuffersDirectory()) { IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter writer = new IndexWriter(directory, config)) { - for (String snippet : snippets) { + for (String chunk : chunks) { Document doc = new Document(); - doc.add(new TextField(CONTENT_FIELD, snippet, Field.Store.YES)); + doc.add(new TextField(CONTENT_FIELD, chunk, Field.Store.YES)); writer.addDocument(doc); } writer.commit(); @@ -71,17 +72,17 @@ public List scoreSnippets(List snippets, String inference IndexSearcher searcher = new IndexSearcher(reader); Query query = createQuery(inferenceText); - int numResults = Math.min(maxResults, snippets.size()); + int numResults = Math.min(maxResults, chunks.size()); TopDocs topDocs = searcher.search(query, numResults); - List scoredSnippets = new ArrayList<>(); + List scoredChunks = new ArrayList<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document doc = reader.storedFields().document(scoreDoc.doc); String content = doc.get(CONTENT_FIELD); - scoredSnippets.add(new ScoredSnippet(content, scoreDoc.score)); + scoredChunks.add(new ScoredChunk(content, scoreDoc.score)); } - return scoredSnippets; + return scoredChunks; } } } @@ -123,7 +124,7 @@ private String[] tokenizeText(String text) throws IOException { } /** - * Represents a snippet with its relevance score. + * Represents a chunk with its relevance score. */ - public record ScoredSnippet(String content, float score) {} + public record ScoredChunk(String content, float score) {} } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java similarity index 67% rename from x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java index e4fc3ad642f6d..4fd43582b48dd 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/snippets/SnippetScorerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.common.snippets; +package org.elasticsearch.xpack.core.common.chunks; import org.elasticsearch.test.ESTestCase; @@ -15,10 +15,10 @@ import static org.hamcrest.Matchers.greaterThan; -public class SnippetScorerTests extends ESTestCase { +public class MemoryIndexChunkScorerTests extends ESTestCase { - public void testScoreSnippets() throws IOException { - SnippetScorer scorer = new SnippetScorer(); + public void testScoreChunks() throws IOException { + MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); List snippets = Arrays.asList( "Cats like to sleep all day and play with mice", @@ -31,26 +31,26 @@ public void testScoreSnippets() throws IOException { String inferenceText = "dogs play walk"; int maxResults = 3; - List scoredSnippets = scorer.scoreSnippets(snippets, inferenceText, maxResults); + List scoredChunks = scorer.scoreChunks(snippets, inferenceText, maxResults); - assertEquals(maxResults, scoredSnippets.size()); + assertEquals(maxResults, scoredChunks.size()); // The snippets about dogs should score highest, followed by the snippet about cats - SnippetScorer.ScoredSnippet snippet = scoredSnippets.getFirst(); + MemoryIndexChunkScorer.ScoredChunk snippet = scoredChunks.getFirst(); assertTrue(snippet.content().equalsIgnoreCase("Dogs love to play with toys and go for walks")); assertThat(snippet.score(), greaterThan(0f)); - snippet = scoredSnippets.get(1); + snippet = scoredChunks.get(1); assertTrue(snippet.content().equalsIgnoreCase("Dogs are loyal companions and great pets")); assertThat(snippet.score(), greaterThan(0f)); - snippet = scoredSnippets.get(2); + snippet = scoredChunks.get(2); assertTrue(snippet.content().equalsIgnoreCase("Cats like to sleep all day and play with mice")); assertThat(snippet.score(), greaterThan(0f)); // Scores should be in descending order - for (int i = 1; i < scoredSnippets.size(); i++) { - assertTrue(scoredSnippets.get(i - 1).score() >= scoredSnippets.get(i).score()); + for (int i = 1; i < scoredChunks.size(); i++) { + assertTrue(scoredChunks.get(i - 1).score() >= scoredChunks.get(i).score()); } } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index 8331e3522d313..dfb1914361f97 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -16,7 +16,7 @@ import org.elasticsearch.search.rank.feature.RankFeatureDoc; import org.elasticsearch.search.rank.feature.RankFeatureShardResult; import org.elasticsearch.search.rank.rerank.RerankingRankFeaturePhaseRankShardContext; -import org.elasticsearch.xpack.core.common.snippets.SnippetScorer; +import org.elasticsearch.xpack.core.common.chunks.MemoryIndexChunkScorer; import org.elasticsearch.xpack.inference.chunking.Chunker; import org.elasticsearch.xpack.inference.chunking.ChunkerBuilder; @@ -55,13 +55,13 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI List bestChunks; try { - SnippetScorer scorer = new SnippetScorer(); - List scoredSnippets = scorer.scoreSnippets( + MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); + List scoredChunks = scorer.scoreChunks( chunks, snippetRankInput.inferenceText(), numSnippets ); - bestChunks = scoredSnippets.stream().map(SnippetScorer.ScoredSnippet::content).limit(numSnippets).toList(); + bestChunks = scoredChunks.stream().map(MemoryIndexChunkScorer.ScoredChunk::content).limit(numSnippets).toList(); } catch (IOException e) { throw new IllegalStateException("Could not generate snippets for input to reranker", e); } From 80f4434779831f9a21fccb3a3ecded2989699005 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 16:17:51 -0400 Subject: [PATCH 07/24] PR feedback on MemoryIndexChunkScorer --- .../common/chunks/MemoryIndexChunkScorer.java | 28 ++----------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java index 748a9b8ba65de..ac1bc5a2d8033 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java @@ -14,16 +14,14 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.QueryBuilder; import java.io.IOException; import java.util.ArrayList; @@ -71,7 +69,8 @@ public List scoreChunks(List chunks, String inferenceText, try (DirectoryReader reader = DirectoryReader.open(directory)) { IndexSearcher searcher = new IndexSearcher(reader); - Query query = createQuery(inferenceText); + org.apache.lucene.util.QueryBuilder qb = new QueryBuilder(analyzer); + Query query = qb.createBooleanQuery(CONTENT_FIELD, inferenceText, BooleanClause.Occur.SHOULD); int numResults = Math.min(maxResults, chunks.size()); TopDocs topDocs = searcher.search(query, numResults); @@ -87,27 +86,6 @@ public List scoreChunks(List chunks, String inferenceText, } } - /** - * Creates a Lucene query from the inference text. - */ - private Query createQuery(String inferenceText) throws IOException { - String[] tokens = tokenizeText(inferenceText); - - if (tokens.length == 0) { - throw new IllegalArgumentException("Inference text must contain at least one valid token"); - } else if (tokens.length == 1) { - return new TermQuery(new Term(CONTENT_FIELD, tokens[0])); - } else { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (String token : tokens) { - if (token != null && token.trim().isEmpty() == false) { - builder.add(new TermQuery(new Term(CONTENT_FIELD, token)), BooleanClause.Occur.SHOULD); - } - } - return builder.build(); - } - } - private String[] tokenizeText(String text) throws IOException { List tokens = new ArrayList<>(); try (org.apache.lucene.analysis.TokenStream tokenStream = analyzer.tokenStream(CONTENT_FIELD, text)) { From 98722584a84f8908855454734feb1f48a70aa955 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 27 Aug 2025 16:54:56 -0400 Subject: [PATCH 08/24] Update API and code to rename snippets to chunks --- .../rest/yaml/CcsCommonYamlTestSuiteIT.java | 2 +- .../yaml/RcsCcsCommonYamlTestSuiteIT.java | 2 +- ...okeTestMultiNodeClientYamlTestSuiteIT.java | 2 +- .../test/rest/ClientYamlTestSuiteIT.java | 2 +- .../org/elasticsearch/TransportVersions.java | 2 +- .../search/rank/feature/RankFeatureDoc.java | 4 +- .../test/cluster/FeatureFlag.java | 2 +- .../xpack/inference/InferenceFeatures.java | 8 +- ...ppetConfig.java => ChunkScorerConfig.java} | 58 ++++++------- .../TextSimilarityRankBuilder.java | 32 +++---- ...ankFeaturePhaseRankCoordinatorContext.java | 12 +-- .../TextSimilarityRankRetrieverBuilder.java | 77 ++++++++--------- ...nkingRankFeaturePhaseRankShardContext.java | 22 ++--- ...aturePhaseRankCoordinatorContextTests.java | 16 ++-- .../TextSimilarityTestPlugin.java | 4 +- .../xpack/inference/InferenceRestIT.java | 2 +- .../70_text_similarity_rank_retriever.yml | 86 +++++++++---------- ...CoreWithSecurityClientYamlTestSuiteIT.java | 2 +- 18 files changed, 165 insertions(+), 170 deletions(-) rename x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/{SnippetConfig.java => ChunkScorerConfig.java} (61%) diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java index 80908dfc6ab1e..33d986f1cf56a 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java @@ -101,7 +101,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_SNIPPETS); + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS); private static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local() .name(REMOTE_CLUSTER_NAME) diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java index e37c553545fdf..f9041aa23d173 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java @@ -100,7 +100,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_SNIPPETS) + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .user("test_admin", "x-pack-test-password"); private static ElasticsearchCluster fulfillingCluster = ElasticsearchCluster.local() diff --git a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java index 2be870dbf4ea5..36fe161d02bf2 100644 --- a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java +++ b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java @@ -40,7 +40,7 @@ public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTe .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_SNIPPETS) + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .build(); public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java index 739b6fd755aa8..e5a53a5016c0f 100644 --- a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java +++ b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java @@ -40,7 +40,7 @@ public class ClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_SNIPPETS) + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .build(); public ClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 81e4cca69769c..27695dc68aa4c 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -353,7 +353,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_SAMPLE_OPERATOR_STATUS = def(9_127_0_00); public static final TransportVersion ESQL_TOPN_TIMINGS = def(9_128_0_00); public static final TransportVersion NODE_WEIGHTS_ADDED_TO_NODE_BALANCE_STATS = def(9_129_0_00); - public static final TransportVersion RERANK_SNIPPETS = def(9_130_0_00); + public static final TransportVersion RERANK_RESCORE_CHUNKS = def(9_130_0_00); public static final TransportVersion PIPELINE_TRACKING_INFO = def(9_131_0_00); public static final TransportVersion COMPONENT_TEMPLATE_TRACKING_INFO = def(9_132_0_00); public static final TransportVersion TO_CHILD_BLOCK_JOIN_QUERY = def(9_133_0_00); diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java index afbb32fd829f7..7981d2edff71b 100644 --- a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java +++ b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java @@ -36,7 +36,7 @@ public RankFeatureDoc(int doc, float score, int shardIndex) { public RankFeatureDoc(StreamInput in) throws IOException { super(in); - if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) { + if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { featureData = in.readOptionalStringCollectionAsList(); } else { String featureDataString = in.readOptionalString(); @@ -55,7 +55,7 @@ public void featureData(List featureData) { @Override protected void doWriteTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) { + if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { out.writeOptionalStringCollection(featureData); } else { out.writeOptionalString(featureData.get(0)); diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index 888c4afbf3326..29afaab962b72 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -24,7 +24,7 @@ public enum FeatureFlag { LOGS_STREAM("es.logs_stream_feature_flag_enabled=true", Version.fromString("9.1.0"), null), PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null), SYNTHETIC_VECTORS("es.mapping_synthetic_vectors=true", Version.fromString("9.2.0"), null), - RERANK_SNIPPETS("es.text_similarity_reranker_snippets=true", Version.fromString("9.2.0"), null); + RERANK_RESCORE_CHUNKS("es.text_similarity_reranker_rescore_chunks=true", Version.fromString("9.2.0"), null); public final String systemProperty; public final Version from; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index fd160ae10fa6f..3142420026f62 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -25,8 +25,8 @@ import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor.SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_SNIPPETS; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_SNIPPETS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_RESCORE_CHUNKS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS; /** * Provides inference features. @@ -85,8 +85,8 @@ public Set getTestFeatures() { SEMANTIC_TEXT_FIELDS_CHUNKS_FORMAT ) ); - if (RERANK_SNIPPETS.isEnabled()) { - testFeatures.add(TEXT_SIMILARITY_RERANKER_SNIPPETS); + if (RERANK_RESCORE_CHUNKS.isEnabled()) { + testFeatures.add(TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS); } return testFeatures; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java similarity index 61% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java index aa9f953ca31df..e5dc57883ed2a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/SnippetConfig.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java @@ -18,14 +18,14 @@ import java.util.Map; import java.util.Objects; -public class SnippetConfig implements Writeable { +public class ChunkScorerConfig implements Writeable { - public final Integer numSnippets; + public final Integer numChunks; private final String inferenceText; private final ChunkingSettings chunkingSettings; public static final int DEFAULT_CHUNK_SIZE = 300; - public static final int DEFAULT_NUM_SNIPPETS = 1; + public static final int DEFAULT_NUM_CHUNKS = 1; public static ChunkingSettings createChunkingSettings(Integer chunkSize) { int chunkSizeOrDefault = chunkSize != null ? chunkSize : DEFAULT_CHUNK_SIZE; @@ -34,45 +34,45 @@ public static ChunkingSettings createChunkingSettings(Integer chunkSize) { return chunkingSettings; } - public SnippetConfig(StreamInput in) throws IOException { - this.numSnippets = in.readOptionalVInt(); - this.inferenceText = in.readString(); - Map chunkingSettingsMap = in.readGenericMap(); - this.chunkingSettings = ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); - } + public static ChunkingSettings chunkingSettingsFromMap(Map map) { - public SnippetConfig(Integer numSnippets, ChunkingSettings chunkingSettings, Integer chunkSize) { - this(numSnippets, null, chunkingSettings, chunkSize); - } + if (map == null || map.isEmpty()) { + return createChunkingSettings(DEFAULT_CHUNK_SIZE); + } - public SnippetConfig(Integer numSnippets, String inferenceText, Integer chunkSize) { - this(numSnippets, inferenceText, null, chunkSize); - } + if (map.size() == 1 && map.containsKey("max_chunk_size")) { + return createChunkingSettings((Integer) map.get("max_chunk_size")); + } - public SnippetConfig(Integer numSnippets, String inferenceText, ChunkingSettings chunkingSettings) { - this(numSnippets, inferenceText, chunkingSettings, null); + return ChunkingSettingsBuilder.fromMap(map); } - public SnippetConfig(Integer numSnippets, String inferenceText, ChunkingSettings chunkingSettings, Integer chunkSize) { + public ChunkScorerConfig(StreamInput in) throws IOException { + this.numChunks = in.readOptionalVInt(); + this.inferenceText = in.readString(); + Map chunkingSettingsMap = in.readGenericMap(); + this.chunkingSettings = ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); + } - if (chunkingSettings != null && chunkSize != null) { - throw new IllegalArgumentException("Only one of chunking_settings or chunk_size may be provided"); - } + public ChunkScorerConfig(Integer numChunks, ChunkingSettings chunkingSettings) { + this(numChunks, null, chunkingSettings); + } - this.numSnippets = numSnippets; + public ChunkScorerConfig(Integer numChunks, String inferenceText, ChunkingSettings chunkingSettings) { + this.numChunks = numChunks; this.inferenceText = inferenceText; - this.chunkingSettings = chunkingSettings != null ? chunkingSettings : createChunkingSettings(chunkSize); + this.chunkingSettings = chunkingSettings; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalVInt(numSnippets); + out.writeOptionalVInt(numChunks); out.writeString(inferenceText); out.writeGenericMap(chunkingSettings.asMap()); } - public Integer numSnippets() { - return numSnippets; + public Integer numChunks() { + return numChunks; } public String inferenceText() { @@ -87,14 +87,14 @@ public ChunkingSettings chunkingSettings() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - SnippetConfig that = (SnippetConfig) o; - return Objects.equals(numSnippets, that.numSnippets) + ChunkScorerConfig that = (ChunkScorerConfig) o; + return Objects.equals(numChunks, that.numChunks) && Objects.equals(inferenceText, that.inferenceText) && Objects.equals(chunkingSettings, that.chunkingSettings); } @Override public int hashCode() { - return Objects.hash(numSnippets, inferenceText, chunkingSettings); + return Objects.hash(numChunks, inferenceText, chunkingSettings); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java index 52543c8ad3a60..69505e3555bf2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java @@ -30,12 +30,12 @@ import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.CHUNK_RESCORER_FIELD; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.FAILURES_ALLOWED_FIELD; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.FIELD_FIELD; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.INFERENCE_ID_FIELD; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.INFERENCE_TEXT_FIELD; import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.MIN_SCORE_FIELD; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.SNIPPETS_FIELD; /** * A {@code RankBuilder} that enables ranking with text similarity model inference. Supports parameters for configuring the inference call. @@ -55,7 +55,7 @@ public class TextSimilarityRankBuilder extends RankBuilder { private final String field; private final Float minScore; private final boolean failuresAllowed; - private final SnippetConfig snippetConfig; + private final ChunkScorerConfig chunkScorerConfig; public TextSimilarityRankBuilder( String field, @@ -64,7 +64,7 @@ public TextSimilarityRankBuilder( int rankWindowSize, Float minScore, boolean failuresAllowed, - SnippetConfig snippetConfig + ChunkScorerConfig chunkScorerConfig ) { super(rankWindowSize); this.inferenceId = inferenceId; @@ -72,7 +72,7 @@ public TextSimilarityRankBuilder( this.field = field; this.minScore = minScore; this.failuresAllowed = failuresAllowed; - this.snippetConfig = snippetConfig; + this.chunkScorerConfig = chunkScorerConfig; } public TextSimilarityRankBuilder(StreamInput in) throws IOException { @@ -88,10 +88,10 @@ public TextSimilarityRankBuilder(StreamInput in) throws IOException { } else { this.failuresAllowed = false; } - if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) { - this.snippetConfig = in.readOptionalWriteable(SnippetConfig::new); + if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + this.chunkScorerConfig = in.readOptionalWriteable(ChunkScorerConfig::new); } else { - this.snippetConfig = null; + this.chunkScorerConfig = null; } } @@ -116,8 +116,8 @@ public void doWriteTo(StreamOutput out) throws IOException { || out.getTransportVersion().onOrAfter(TransportVersions.RERANKER_FAILURES_ALLOWED)) { out.writeBoolean(failuresAllowed); } - if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) { - out.writeOptionalWriteable(snippetConfig); + if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + out.writeOptionalWriteable(chunkScorerConfig); } } @@ -134,8 +134,8 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio if (failuresAllowed) { builder.field(FAILURES_ALLOWED_FIELD.getPreferredName(), true); } - if (snippetConfig != null) { - builder.field(SNIPPETS_FIELD.getPreferredName(), snippetConfig); + if (chunkScorerConfig != null) { + builder.field(CHUNK_RESCORER_FIELD.getPreferredName(), chunkScorerConfig); } } @@ -183,7 +183,7 @@ public QueryPhaseRankCoordinatorContext buildQueryPhaseCoordinatorContext(int si @Override public RankFeaturePhaseRankShardContext buildRankFeaturePhaseShardContext() { - return new TextSimilarityRerankingRankFeaturePhaseRankShardContext(field, snippetConfig); + return new TextSimilarityRerankingRankFeaturePhaseRankShardContext(field, chunkScorerConfig); } @Override @@ -197,7 +197,9 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo inferenceText, minScore, failuresAllowed, - snippetConfig != null ? new SnippetConfig(snippetConfig.numSnippets, inferenceText, snippetConfig.chunkingSettings()) : null + chunkScorerConfig != null + ? new ChunkScorerConfig(chunkScorerConfig.numChunks, inferenceText, chunkScorerConfig.chunkingSettings()) + : null ); } @@ -229,12 +231,12 @@ protected boolean doEquals(RankBuilder other) { && Objects.equals(field, that.field) && Objects.equals(minScore, that.minScore) && failuresAllowed == that.failuresAllowed - && Objects.equals(snippetConfig, that.snippetConfig); + && Objects.equals(chunkScorerConfig, that.chunkScorerConfig); } @Override protected int doHashCode() { - return Objects.hash(inferenceId, inferenceText, field, minScore, failuresAllowed, snippetConfig); + return Objects.hash(inferenceId, inferenceText, field, minScore, failuresAllowed, chunkScorerConfig); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java index 0a47db4d2a519..725443bc01e0d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java @@ -40,7 +40,7 @@ public class TextSimilarityRankFeaturePhaseRankCoordinatorContext extends RankFe protected final String inferenceId; protected final String inferenceText; protected final Float minScore; - protected final SnippetConfig snippetConfig; + protected final ChunkScorerConfig chunkScorerConfig; public TextSimilarityRankFeaturePhaseRankCoordinatorContext( int size, @@ -51,14 +51,14 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext( String inferenceText, Float minScore, boolean failuresAllowed, - @Nullable SnippetConfig snippetConfig + @Nullable ChunkScorerConfig chunkScorerConfig ) { super(size, from, rankWindowSize, failuresAllowed); this.client = client; this.inferenceId = inferenceId; this.inferenceText = inferenceText; this.minScore = minScore; - this.snippetConfig = snippetConfig; + this.chunkScorerConfig = chunkScorerConfig; } @Override @@ -80,8 +80,8 @@ protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener rankedDocs return scores; } - float[] extractScoresFromRankedSnippets(List rankedDocs, RankFeatureDoc[] featureDocs) { + float[] extractScoresFromRankedChunks(List rankedDocs, RankFeatureDoc[] featureDocs) { float[] scores = new float[featureDocs.length]; boolean[] hasScore = new boolean[featureDocs.length]; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 630560131efb8..ca26e34a0cd4f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -23,7 +23,6 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; -import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder; import java.io.IOException; import java.util.ArrayList; @@ -45,17 +44,16 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder "text_similarity_reranker_alias_handling_fix" ); public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix"); - public static final NodeFeature TEXT_SIMILARITY_RERANKER_SNIPPETS = new NodeFeature("text_similarity_reranker_snippets"); - public static final FeatureFlag RERANK_SNIPPETS = new FeatureFlag("text_similarity_reranker_snippets"); + public static final NodeFeature TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS = new NodeFeature("text_similarity_reranker_rescore_chunks"); + public static final FeatureFlag RERANK_RESCORE_CHUNKS = new FeatureFlag("text_similarity_reranker_rescore_chunks"); public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id"); public static final ParseField INFERENCE_TEXT_FIELD = new ParseField("inference_text"); public static final ParseField FIELD_FIELD = new ParseField("field"); public static final ParseField FAILURES_ALLOWED_FIELD = new ParseField("allow_rerank_failures"); - public static final ParseField SNIPPETS_FIELD = new ParseField("snippets"); - public static final ParseField NUM_SNIPPETS_FIELD = new ParseField("num_snippets"); - public static final ParseField CHUNK_SIZE_FIELD = new ParseField("chunk_size"); + public static final ParseField CHUNK_RESCORER_FIELD = new ParseField("chunk_rescorer"); + public static final ParseField NUM_CHUNKS_FIELD = new ParseField("num_chunks"); public static final ParseField CHUNKING_SETTINGS_FIELD = new ParseField("chunking_settings"); public static final ConstructingObjectParser PARSER = @@ -66,7 +64,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder String field = (String) args[3]; int rankWindowSize = args[4] == null ? DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; boolean failuresAllowed = args[5] != null && (Boolean) args[5]; - SnippetConfig snippets = (SnippetConfig) args[6]; + ChunkScorerConfig chunkScorerConfig = (ChunkScorerConfig) args[6]; return new TextSimilarityRankRetrieverBuilder( retrieverBuilder, @@ -75,22 +73,18 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder field, rankWindowSize, failuresAllowed, - snippets + chunkScorerConfig ); }); - private static final ConstructingObjectParser SNIPPETS_PARSER = new ConstructingObjectParser<>( - SNIPPETS_FIELD.getPreferredName(), - true, - args -> { - Integer numSnippets = (Integer) args[0]; - Integer chunkSize = (Integer) args[1]; + private static final ConstructingObjectParser CHUNK_SCORER_PARSER = + new ConstructingObjectParser<>(CHUNK_RESCORER_FIELD.getPreferredName(), true, args -> { + Integer numChunks = (Integer) args[0]; @SuppressWarnings("unchecked") - Map chunkingSettingsMap = (Map) args[2]; - ChunkingSettings chunkingSettings = chunkingSettingsMap != null ? ChunkingSettingsBuilder.fromMap(chunkingSettingsMap) : null; - return new SnippetConfig(numSnippets, chunkingSettings, chunkSize); - } - ); + Map chunkingSettingsMap = (Map) args[1]; + ChunkingSettings chunkingSettings = ChunkScorerConfig.chunkingSettingsFromMap(chunkingSettingsMap); + return new ChunkScorerConfig(numChunks, chunkingSettings); + }); static { PARSER.declareNamedObject(constructorArg(), (p, c, n) -> { @@ -103,11 +97,10 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder PARSER.declareString(constructorArg(), FIELD_FIELD); PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD); PARSER.declareBoolean(optionalConstructorArg(), FAILURES_ALLOWED_FIELD); - PARSER.declareObject(optionalConstructorArg(), SNIPPETS_PARSER, SNIPPETS_FIELD); - if (RERANK_SNIPPETS.isEnabled()) { - SNIPPETS_PARSER.declareInt(optionalConstructorArg(), NUM_SNIPPETS_FIELD); - SNIPPETS_PARSER.declareInt(optionalConstructorArg(), CHUNK_SIZE_FIELD); - SNIPPETS_PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> p.map(), null, CHUNKING_SETTINGS_FIELD); + PARSER.declareObject(optionalConstructorArg(), CHUNK_SCORER_PARSER, CHUNK_RESCORER_FIELD); + if (RERANK_RESCORE_CHUNKS.isEnabled()) { + CHUNK_SCORER_PARSER.declareInt(optionalConstructorArg(), NUM_CHUNKS_FIELD); + CHUNK_SCORER_PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> p.map(), null, CHUNKING_SETTINGS_FIELD); } RetrieverBuilder.declareBaseParserFields(PARSER); @@ -128,7 +121,7 @@ public static TextSimilarityRankRetrieverBuilder fromXContent( private final String inferenceText; private final String field; private final boolean failuresAllowed; - private final SnippetConfig snippets; + private final ChunkScorerConfig chunkScorerConfig; public TextSimilarityRankRetrieverBuilder( RetrieverBuilder retrieverBuilder, @@ -137,14 +130,14 @@ public TextSimilarityRankRetrieverBuilder( String field, int rankWindowSize, boolean failuresAllowed, - SnippetConfig snippets + ChunkScorerConfig chunkScorerConfig ) { super(List.of(RetrieverSource.from(retrieverBuilder)), rankWindowSize); this.inferenceId = inferenceId; this.inferenceText = inferenceText; this.field = field; this.failuresAllowed = failuresAllowed; - this.snippets = snippets; + this.chunkScorerConfig = chunkScorerConfig; } public TextSimilarityRankRetrieverBuilder( @@ -157,14 +150,14 @@ public TextSimilarityRankRetrieverBuilder( boolean failuresAllowed, String retrieverName, List preFilterQueryBuilders, - SnippetConfig snippets + ChunkScorerConfig chunkScorerConfig ) { super(retrieverSource, rankWindowSize); if (retrieverSource.size() != 1) { throw new IllegalArgumentException("[" + getName() + "] retriever should have exactly one inner retriever"); } - if (snippets != null && snippets.numSnippets() != null && snippets.numSnippets() < 1) { - throw new IllegalArgumentException("num_snippets must be greater than 0, was: " + snippets.numSnippets()); + if (chunkScorerConfig != null && chunkScorerConfig.numChunks() != null && chunkScorerConfig.numChunks() < 1) { + throw new IllegalArgumentException("num_chunks must be greater than 0, was: " + chunkScorerConfig.numChunks()); } this.inferenceId = inferenceId; this.inferenceText = inferenceText; @@ -173,7 +166,7 @@ public TextSimilarityRankRetrieverBuilder( this.failuresAllowed = failuresAllowed; this.retrieverName = retrieverName; this.preFilterQueryBuilders = preFilterQueryBuilders; - this.snippets = snippets; + this.chunkScorerConfig = chunkScorerConfig; } @Override @@ -191,7 +184,7 @@ protected TextSimilarityRankRetrieverBuilder clone( failuresAllowed, retrieverName, newPreFilterQueryBuilders, - snippets + chunkScorerConfig ); } @@ -226,7 +219,9 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu rankWindowSize, minScore, failuresAllowed, - snippets != null ? new SnippetConfig(snippets.numSnippets, inferenceText, snippets.chunkingSettings()) : null + chunkScorerConfig != null + ? new ChunkScorerConfig(chunkScorerConfig.numChunks, inferenceText, chunkScorerConfig.chunkingSettings()) + : null ) ); return sourceBuilder; @@ -255,13 +250,13 @@ protected void doToXContent(XContentBuilder builder, Params params) throws IOExc if (failuresAllowed) { builder.field(FAILURES_ALLOWED_FIELD.getPreferredName(), failuresAllowed); } - if (snippets != null) { - builder.startObject(SNIPPETS_FIELD.getPreferredName()); - if (snippets.numSnippets() != null) { - builder.field(NUM_SNIPPETS_FIELD.getPreferredName(), snippets.numSnippets()); + if (chunkScorerConfig != null) { + builder.startObject(CHUNK_RESCORER_FIELD.getPreferredName()); + if (chunkScorerConfig.numChunks() != null) { + builder.field(NUM_CHUNKS_FIELD.getPreferredName(), chunkScorerConfig.numChunks()); } - if (snippets.chunkingSettings() != null) { - builder.field(CHUNKING_SETTINGS_FIELD.getPreferredName(), snippets.chunkingSettings().asMap()); + if (chunkScorerConfig.chunkingSettings() != null) { + builder.field(CHUNKING_SETTINGS_FIELD.getPreferredName(), chunkScorerConfig.chunkingSettings().asMap()); } builder.endObject(); } @@ -277,11 +272,11 @@ public boolean doEquals(Object other) { && rankWindowSize == that.rankWindowSize && Objects.equals(minScore, that.minScore) && failuresAllowed == that.failuresAllowed - && Objects.equals(snippets, that.snippets); + && Objects.equals(chunkScorerConfig, that.chunkScorerConfig); } @Override public int doHashCode() { - return Objects.hash(inferenceId, inferenceText, field, rankWindowSize, minScore, failuresAllowed, snippets); + return Objects.hash(inferenceId, inferenceText, field, rankWindowSize, minScore, failuresAllowed, chunkScorerConfig); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index dfb1914361f97..ff1900f2eaca1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -23,18 +23,18 @@ import java.io.IOException; import java.util.List; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.SnippetConfig.DEFAULT_NUM_SNIPPETS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.ChunkScorerConfig.DEFAULT_NUM_CHUNKS; public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends RerankingRankFeaturePhaseRankShardContext { - private final SnippetConfig snippetRankInput; + private final ChunkScorerConfig chunkScorerConfig; private final ChunkingSettings chunkingSettings; private final Chunker chunker; - public TextSimilarityRerankingRankFeaturePhaseRankShardContext(String field, @Nullable SnippetConfig snippetRankInput) { + public TextSimilarityRerankingRankFeaturePhaseRankShardContext(String field, @Nullable ChunkScorerConfig chunkScorerConfig) { super(field); - this.snippetRankInput = snippetRankInput; - chunkingSettings = snippetRankInput != null ? snippetRankInput.chunkingSettings() : null; + this.chunkScorerConfig = chunkScorerConfig; + chunkingSettings = chunkScorerConfig != null ? chunkScorerConfig.chunkingSettings() : null; chunker = chunkingSettings != null ? ChunkerBuilder.fromChunkingStrategy(chunkingSettings.getChunkingStrategy()) : null; } @@ -46,8 +46,8 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI SearchHit hit = hits.getHits()[i]; DocumentField docField = hit.field(field); if (docField != null) { - if (snippetRankInput != null) { - int numSnippets = snippetRankInput.numSnippets() != null ? snippetRankInput.numSnippets() : DEFAULT_NUM_SNIPPETS; + if (chunkScorerConfig != null) { + int numChunks = chunkScorerConfig.numChunks() != null ? chunkScorerConfig.numChunks() : DEFAULT_NUM_CHUNKS; List chunkOffsets = chunker.chunk(docField.getValue().toString(), chunkingSettings); List chunks = chunkOffsets.stream() .map(offset -> { return docField.getValue().toString().substring(offset.start(), offset.end()); }) @@ -58,12 +58,12 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); List scoredChunks = scorer.scoreChunks( chunks, - snippetRankInput.inferenceText(), - numSnippets + chunkScorerConfig.inferenceText(), + numChunks ); - bestChunks = scoredChunks.stream().map(MemoryIndexChunkScorer.ScoredChunk::content).limit(numSnippets).toList(); + bestChunks = scoredChunks.stream().map(MemoryIndexChunkScorer.ScoredChunk::content).limit(numChunks).toList(); } catch (IOException e) { - throw new IllegalStateException("Could not generate snippets for input to reranker", e); + throw new IllegalStateException("Could not generate chunks for input to reranker", e); } rankFeatureDocs[i].featureData(bestChunks); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java index 9a2b563bc9c1c..bf2beaa131e67 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java @@ -39,7 +39,7 @@ public class TextSimilarityRankFeaturePhaseRankCoordinatorContextTests extends E null ); - TextSimilarityRankFeaturePhaseRankCoordinatorContext withSnippets = new TextSimilarityRankFeaturePhaseRankCoordinatorContext( + TextSimilarityRankFeaturePhaseRankCoordinatorContext withChunks = new TextSimilarityRankFeaturePhaseRankCoordinatorContext( 10, 0, 100, @@ -48,7 +48,7 @@ public class TextSimilarityRankFeaturePhaseRankCoordinatorContextTests extends E "some query", 0.0f, false, - new SnippetConfig(2, "some query", 20) + new ChunkScorerConfig(2, "some query", null) ); public void testComputeScores() { @@ -87,7 +87,7 @@ public void testExtractScoresFromRankedDocs() { assertArrayEquals(new float[] { 1.0f, 3.0f, 2.0f }, scores, 0.0f); } - public void testExtractScoresFromSingleSnippets() { + public void testExtractScoresFromSingleChunk() { List rankedDocs = List.of( new RankedDocsResults.RankedDoc(0, 1.0f, "text 1"), @@ -99,12 +99,12 @@ public void testExtractScoresFromSingleSnippets() { createRankFeatureDoc(1, 3.0f, 1, List.of("text 2")), createRankFeatureDoc(2, 2.0f, 0, List.of("text 3")) }; - float[] scores = withSnippets.extractScoresFromRankedSnippets(rankedDocs, featureDocs); - // Returned cores are from the snippet, not the whole text + float[] scores = withChunks.extractScoresFromRankedChunks(rankedDocs, featureDocs); + // Returned cores are from the chunk, not the whole text assertArrayEquals(new float[] { 1.0f, 2.5f, 1.5f }, scores, 0.0f); } - public void testExtractScoresFromMultipleSnippets() { + public void testExtractScoresFromMultipleChunks() { List rankedDocs = List.of( new RankedDocsResults.RankedDoc(0, 1.0f, "this is text 1"), @@ -119,8 +119,8 @@ public void testExtractScoresFromMultipleSnippets() { createRankFeatureDoc(1, 3.0f, 1, List.of("yet more text", "this is text 2")), createRankFeatureDoc(2, 2.0f, 0, List.of("this is text 3", "oh look, more text")) }; - float[] scores = withSnippets.extractScoresFromRankedSnippets(rankedDocs, featureDocs); - // Returned scores are from the best-ranking snippet, not the whole text + float[] scores = withChunks.extractScoresFromRankedChunks(rankedDocs, featureDocs); + // Returned scores are from the best-ranking chunk, not the whole text assertArrayEquals(new float[] { 2.5f, 3.0f, 2.0f }, scores, 0.0f); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java index c88ce1b65ee3d..cad05d791c9c6 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java @@ -177,9 +177,9 @@ public ThrowingMockRequestActionBasedRankBuilder( Float minScore, boolean failuresAllowed, String throwingType, - SnippetConfig snippetConfig + ChunkScorerConfig chunkScorerConfig ) { - super(field, inferenceId, inferenceText, rankWindowSize, minScore, failuresAllowed, snippetConfig); + super(field, inferenceId, inferenceText, rankWindowSize, minScore, failuresAllowed, chunkScorerConfig); this.throwingRankBuilderType = AbstractRerankerIT.ThrowingRankBuilderType.valueOf(throwingType); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java b/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java index d15016ee9f410..1857989bd1024 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java +++ b/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java @@ -34,7 +34,7 @@ public class InferenceRestIT extends ESClientYamlSuiteTestCase { .setting("xpack.security.enabled", "false") .setting("xpack.security.http.ssl.enabled", "false") .setting("xpack.license.self_generated.type", "trial") - .feature(FeatureFlag.RERANK_SNIPPETS) + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .plugin("inference-service-test") .distribution(DistributionType.DEFAULT) .build(); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 8d1918624e9a4..8b73cefaa87f9 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -515,14 +515,14 @@ setup: --- -"Text similarity reranker specifying number of snippets must be > 0": +"Text similarity reranker specifying number of rescore_chunks must be > 0": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: - catch: /num_snippets must be greater than 0/ + catch: /num_chunks must be greater than 0/ search: index: test-index body: @@ -538,18 +538,18 @@ setup: inference_id: my-rerank-model inference_text: "How often does the moon hide the sun?" field: inference_text_field - snippets: - num_snippets: 0 + chunk_rescorer: + num_chunks: 0 size: 10 - match: { status: 400 } --- -"Reranking based on snippets": +"Reranking based on rescore_chunks": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -569,8 +569,8 @@ setup: inference_id: my-rerank-model inference_text: "How often does the moon hide the sun?" field: text - snippets: - num_snippets: 2 + chunk_rescorer: + num_chunks: 2 size: 10 - match: { hits.total.value: 2 } @@ -580,11 +580,11 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets using defaults": +"Reranking based on rescore_chunks using defaults": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -603,7 +603,7 @@ setup: inference_id: my-rerank-model inference_text: "How often does the moon hide the sun?" field: text - snippets: { } + chunk_rescorer: { } size: 10 - match: { hits.total.value: 2 } @@ -613,11 +613,11 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets on a semantic_text field": +"Reranking based on rescore_chunks on a semantic_text field": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -637,8 +637,8 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: - num_snippets: 2 + chunk_rescorer: + num_chunks: 2 size: 10 - match: { hits.total.value: 2 } @@ -648,11 +648,11 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets on a semantic_text field using defaults": +"Reranking based on rescore_chunks on a semantic_text field using defaults": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -672,7 +672,7 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: { } + chunk_rescorer: { } size: 10 - match: { hits.total.value: 2 } @@ -682,11 +682,11 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets on a semantic_text field specifying chunking settings": +"Reranking based on rescore_chunks on a semantic_text field specifying chunking settings": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -706,7 +706,7 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: + chunk_rescorer: chunking_settings: strategy: sentence max_chunk_size: 20 @@ -720,11 +720,11 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets on a semantic_text field specifying chunking settings requires valid chunking settings": +"Reranking based on rescore_chunks on a semantic_text field specifying chunking settings requires valid chunking settings": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: catch: /Invalid value/ @@ -745,7 +745,8 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: + chunk_rescorer: + chunk_size: 20 chunking_settings: strategy: sentence max_chunk_size: 10 @@ -753,11 +754,11 @@ setup: size: 10 --- -"Reranking based on snippets on a semantic_text field specifying chunk size": +"Reranking based on rescore_chunks on a semantic_text field specifying chunk size": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: search: @@ -777,7 +778,7 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: + chunk_rescorer: chunk_size: 20 size: 10 @@ -788,14 +789,13 @@ setup: - match: { hits.hits.1._id: "doc_2" } --- -"Reranking based on snippets on a semantic_text field cannot specify both chunking settings and chunk size": +"Reranking based on chunk_rescorer specifying only max chunk size will default remaining chunking settings": - requires: - cluster_features: "text_similarity_reranker_snippets" - reason: snippets introduced in 9.2.0 + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 - do: - catch: /Only one of chunking_settings or chunk_size may be provided/ search: index: test-index body: @@ -813,10 +813,8 @@ setup: inference_id: my-rerank-model inference_text: "how often does the moon hide the sun?" field: semantic_text_field - snippets: - chunk_size: 20 + chunk_rescorer: + chunk_rescorer: 20 chunking_settings: - strategy: sentence max_chunk_size: 20 - sentence_overlap: 0 size: 10 diff --git a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java index 88c754b257f5e..d792d4932d80a 100644 --- a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java +++ b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java @@ -54,7 +54,7 @@ public class CoreWithSecurityClientYamlTestSuiteIT extends ESClientYamlSuiteTest .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_SNIPPETS) + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .build(); public CoreWithSecurityClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { From 8c4ab1e4b98a207c19b1eb48ce90057cf037c3a2 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 28 Aug 2025 09:07:03 -0400 Subject: [PATCH 09/24] Missed some snippet renames --- .../chunks/MemoryIndexChunkScorerTests.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java index 4fd43582b48dd..162f860d17f55 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java @@ -20,7 +20,7 @@ public class MemoryIndexChunkScorerTests extends ESTestCase { public void testScoreChunks() throws IOException { MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); - List snippets = Arrays.asList( + List chunks = Arrays.asList( "Cats like to sleep all day and play with mice", "Dogs are loyal companions and great pets", "The weather today is very sunny and warm", @@ -31,22 +31,22 @@ public void testScoreChunks() throws IOException { String inferenceText = "dogs play walk"; int maxResults = 3; - List scoredChunks = scorer.scoreChunks(snippets, inferenceText, maxResults); + List scoredChunks = scorer.scoreChunks(chunks, inferenceText, maxResults); assertEquals(maxResults, scoredChunks.size()); - // The snippets about dogs should score highest, followed by the snippet about cats - MemoryIndexChunkScorer.ScoredChunk snippet = scoredChunks.getFirst(); - assertTrue(snippet.content().equalsIgnoreCase("Dogs love to play with toys and go for walks")); - assertThat(snippet.score(), greaterThan(0f)); + // The chunks about dogs should score highest, followed by the chunk about cats + MemoryIndexChunkScorer.ScoredChunk chunk = scoredChunks.getFirst(); + assertTrue(chunk.content().equalsIgnoreCase("Dogs love to play with toys and go for walks")); + assertThat(chunk.score(), greaterThan(0f)); - snippet = scoredChunks.get(1); - assertTrue(snippet.content().equalsIgnoreCase("Dogs are loyal companions and great pets")); - assertThat(snippet.score(), greaterThan(0f)); + chunk = scoredChunks.get(1); + assertTrue(chunk.content().equalsIgnoreCase("Dogs are loyal companions and great pets")); + assertThat(chunk.score(), greaterThan(0f)); - snippet = scoredChunks.get(2); - assertTrue(snippet.content().equalsIgnoreCase("Cats like to sleep all day and play with mice")); - assertThat(snippet.score(), greaterThan(0f)); + chunk = scoredChunks.get(2); + assertTrue(chunk.content().equalsIgnoreCase("Cats like to sleep all day and play with mice")); + assertThat(chunk.score(), greaterThan(0f)); // Scores should be in descending order for (int i = 1; i < scoredChunks.size(); i++) { From fc706a8903c4acb99bec23394015199cc050bea2 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Tue, 2 Sep 2025 15:06:50 -0400 Subject: [PATCH 10/24] Handle case where no matches were found to score chunks --- .../common/chunks/MemoryIndexChunkScorer.java | 7 +++- .../70_text_similarity_rank_retriever.yml | 41 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java index ac1bc5a2d8033..5e64246991a17 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java @@ -81,7 +81,12 @@ public List scoreChunks(List chunks, String inferenceText, scoredChunks.add(new ScoredChunk(content, scoreDoc.score)); } - return scoredChunks; + // It's possible that no chunks were scorable (for example, a semantic match that does not have a lexical match). + // In this case, we'll return the first N chunks with a score of 0. + // TODO: consider parameterizing this + return scoredChunks.isEmpty() == false + ? scoredChunks + : chunks.subList(0, Math.min(maxResults, chunks.size())).stream().map(c -> new ScoredChunk(c, 0.0f)).toList(); } } } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 8b73cefaa87f9..ba616a893ab58 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -818,3 +818,44 @@ setup: chunking_settings: max_chunk_size: 20 size: 10 + + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } + + +--- +"Reranking based on chunk_rescorer will send in first chunk if no text matches found": + + - requires: + cluster_features: "text_similarity_reranker_rescore_chunks" + reason: rescore_chunks introduced in 9.2.0 + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "semantic_text_field", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match: + topic: + query: "science" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "iamanonsensefieldthatshouldreturnnoresults" + field: semantic_text_field + chunk_rescorer: { } + size: 10 + + - match: { hits.total.value: 2 } + - length: { hits.hits: 2 } + + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } From 246dfa21f253fc2cb81b7a8b9ed5717b7e6c33c0 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 8 Sep 2025 10:14:21 -0400 Subject: [PATCH 11/24] PR feedback on MemoryIndexChunkScorer, add tests --- .../common/chunks/MemoryIndexChunkScorer.java | 15 ----- .../chunks/MemoryIndexChunkScorerTests.java | 57 ++++++++++++++++--- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java index 5e64246991a17..5b6a895e1e090 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java @@ -91,21 +91,6 @@ public List scoreChunks(List chunks, String inferenceText, } } - private String[] tokenizeText(String text) throws IOException { - List tokens = new ArrayList<>(); - try (org.apache.lucene.analysis.TokenStream tokenStream = analyzer.tokenStream(CONTENT_FIELD, text)) { - org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAttribute = tokenStream.addAttribute( - org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class - ); - tokenStream.reset(); - while (tokenStream.incrementToken()) { - tokens.add(termAttribute.toString()); - } - tokenStream.end(); - } - return tokens.toArray(new String[0]); - } - /** * Represents a chunk with its relevance score. */ diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java index 162f860d17f55..30b82eabf9742 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java @@ -13,25 +13,26 @@ import java.util.Arrays; import java.util.List; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; public class MemoryIndexChunkScorerTests extends ESTestCase { + private static final List CHUNKS = Arrays.asList( + "Cats like to sleep all day and play with mice", + "Dogs are loyal companions and great pets", + "The weather today is very sunny and warm", + "Dogs love to play with toys and go for walks", + "Elasticsearch is a great search engine" + ); + public void testScoreChunks() throws IOException { MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); - List chunks = Arrays.asList( - "Cats like to sleep all day and play with mice", - "Dogs are loyal companions and great pets", - "The weather today is very sunny and warm", - "Dogs love to play with toys and go for walks", - "Elasticsearch is a great search engine" - ); - String inferenceText = "dogs play walk"; int maxResults = 3; - List scoredChunks = scorer.scoreChunks(chunks, inferenceText, maxResults); + List scoredChunks = scorer.scoreChunks(CHUNKS, inferenceText, maxResults); assertEquals(maxResults, scoredChunks.size()); @@ -53,4 +54,42 @@ public void testScoreChunks() throws IOException { assertTrue(scoredChunks.get(i - 1).score() >= scoredChunks.get(i).score()); } } + + public void testEmptyChunks() throws IOException { + + int maxResults = 3; + + MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer(); + + // Zero results + List scoredChunks = scorer.scoreChunks(CHUNKS, "puggles", maxResults); + assertEquals(maxResults, scoredChunks.size()); + + // There were no results so we return the first N chunks in order + MemoryIndexChunkScorer.ScoredChunk chunk = scoredChunks.getFirst(); + assertTrue(chunk.content().equalsIgnoreCase("Cats like to sleep all day and play with mice")); + assertThat(chunk.score(), equalTo(0f)); + + chunk = scoredChunks.get(1); + assertTrue(chunk.content().equalsIgnoreCase("Dogs are loyal companions and great pets")); + assertThat(chunk.score(), equalTo(0f)); + + chunk = scoredChunks.get(2); + assertTrue(chunk.content().equalsIgnoreCase("The weather today is very sunny and warm")); + assertThat(chunk.score(), equalTo(0f)); + + // Null and Empty chunk input + scoredChunks = scorer.scoreChunks(List.of(), "puggles", maxResults); + assertTrue(scoredChunks.isEmpty()); + + scoredChunks = scorer.scoreChunks(CHUNKS, "", maxResults); + assertTrue(scoredChunks.isEmpty()); + + scoredChunks = scorer.scoreChunks(null, "puggles", maxResults); + assertTrue(scoredChunks.isEmpty()); + + scoredChunks = scorer.scoreChunks(CHUNKS, null, maxResults); + assertTrue(scoredChunks.isEmpty()); + } + } From 6355282f9b632aed84795755c508ffea9fd3aed6 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 8 Sep 2025 10:20:36 -0400 Subject: [PATCH 12/24] Rename num_chunks to size --- .../textsimilarity/ChunkScorerConfig.java | 24 +++++++++---------- .../TextSimilarityRankBuilder.java | 2 +- .../TextSimilarityRankRetrieverBuilder.java | 18 +++++++------- ...nkingRankFeaturePhaseRankShardContext.java | 8 +++---- .../70_text_similarity_rank_retriever.yml | 8 +++---- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java index e5dc57883ed2a..92c4eace01442 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/ChunkScorerConfig.java @@ -20,12 +20,12 @@ public class ChunkScorerConfig implements Writeable { - public final Integer numChunks; + public final Integer size; private final String inferenceText; private final ChunkingSettings chunkingSettings; public static final int DEFAULT_CHUNK_SIZE = 300; - public static final int DEFAULT_NUM_CHUNKS = 1; + public static final int DEFAULT_SIZE = 1; public static ChunkingSettings createChunkingSettings(Integer chunkSize) { int chunkSizeOrDefault = chunkSize != null ? chunkSize : DEFAULT_CHUNK_SIZE; @@ -48,31 +48,31 @@ public static ChunkingSettings chunkingSettingsFromMap(Map map) } public ChunkScorerConfig(StreamInput in) throws IOException { - this.numChunks = in.readOptionalVInt(); + this.size = in.readOptionalVInt(); this.inferenceText = in.readString(); Map chunkingSettingsMap = in.readGenericMap(); this.chunkingSettings = ChunkingSettingsBuilder.fromMap(chunkingSettingsMap); } - public ChunkScorerConfig(Integer numChunks, ChunkingSettings chunkingSettings) { - this(numChunks, null, chunkingSettings); + public ChunkScorerConfig(Integer size, ChunkingSettings chunkingSettings) { + this(size, null, chunkingSettings); } - public ChunkScorerConfig(Integer numChunks, String inferenceText, ChunkingSettings chunkingSettings) { - this.numChunks = numChunks; + public ChunkScorerConfig(Integer size, String inferenceText, ChunkingSettings chunkingSettings) { + this.size = size; this.inferenceText = inferenceText; this.chunkingSettings = chunkingSettings; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalVInt(numChunks); + out.writeOptionalVInt(size); out.writeString(inferenceText); out.writeGenericMap(chunkingSettings.asMap()); } - public Integer numChunks() { - return numChunks; + public Integer size() { + return size; } public String inferenceText() { @@ -88,13 +88,13 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ChunkScorerConfig that = (ChunkScorerConfig) o; - return Objects.equals(numChunks, that.numChunks) + return Objects.equals(size, that.size) && Objects.equals(inferenceText, that.inferenceText) && Objects.equals(chunkingSettings, that.chunkingSettings); } @Override public int hashCode() { - return Objects.hash(numChunks, inferenceText, chunkingSettings); + return Objects.hash(size, inferenceText, chunkingSettings); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java index 69505e3555bf2..6357588d6fa1c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java @@ -198,7 +198,7 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo minScore, failuresAllowed, chunkScorerConfig != null - ? new ChunkScorerConfig(chunkScorerConfig.numChunks, inferenceText, chunkScorerConfig.chunkingSettings()) + ? new ChunkScorerConfig(chunkScorerConfig.size, inferenceText, chunkScorerConfig.chunkingSettings()) : null ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index ca26e34a0cd4f..8571775d70795 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -53,7 +53,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder public static final ParseField FIELD_FIELD = new ParseField("field"); public static final ParseField FAILURES_ALLOWED_FIELD = new ParseField("allow_rerank_failures"); public static final ParseField CHUNK_RESCORER_FIELD = new ParseField("chunk_rescorer"); - public static final ParseField NUM_CHUNKS_FIELD = new ParseField("num_chunks"); + public static final ParseField CHUNK_SIZE_FIELD = new ParseField("size"); public static final ParseField CHUNKING_SETTINGS_FIELD = new ParseField("chunking_settings"); public static final ConstructingObjectParser PARSER = @@ -79,11 +79,11 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder private static final ConstructingObjectParser CHUNK_SCORER_PARSER = new ConstructingObjectParser<>(CHUNK_RESCORER_FIELD.getPreferredName(), true, args -> { - Integer numChunks = (Integer) args[0]; + Integer size = (Integer) args[0]; @SuppressWarnings("unchecked") Map chunkingSettingsMap = (Map) args[1]; ChunkingSettings chunkingSettings = ChunkScorerConfig.chunkingSettingsFromMap(chunkingSettingsMap); - return new ChunkScorerConfig(numChunks, chunkingSettings); + return new ChunkScorerConfig(size, chunkingSettings); }); static { @@ -99,7 +99,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder PARSER.declareBoolean(optionalConstructorArg(), FAILURES_ALLOWED_FIELD); PARSER.declareObject(optionalConstructorArg(), CHUNK_SCORER_PARSER, CHUNK_RESCORER_FIELD); if (RERANK_RESCORE_CHUNKS.isEnabled()) { - CHUNK_SCORER_PARSER.declareInt(optionalConstructorArg(), NUM_CHUNKS_FIELD); + CHUNK_SCORER_PARSER.declareInt(optionalConstructorArg(), CHUNK_SIZE_FIELD); CHUNK_SCORER_PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> p.map(), null, CHUNKING_SETTINGS_FIELD); } @@ -156,8 +156,8 @@ public TextSimilarityRankRetrieverBuilder( if (retrieverSource.size() != 1) { throw new IllegalArgumentException("[" + getName() + "] retriever should have exactly one inner retriever"); } - if (chunkScorerConfig != null && chunkScorerConfig.numChunks() != null && chunkScorerConfig.numChunks() < 1) { - throw new IllegalArgumentException("num_chunks must be greater than 0, was: " + chunkScorerConfig.numChunks()); + if (chunkScorerConfig != null && chunkScorerConfig.size() != null && chunkScorerConfig.size() < 1) { + throw new IllegalArgumentException("size must be greater than 0, was: " + chunkScorerConfig.size()); } this.inferenceId = inferenceId; this.inferenceText = inferenceText; @@ -220,7 +220,7 @@ protected SearchSourceBuilder finalizeSourceBuilder(SearchSourceBuilder sourceBu minScore, failuresAllowed, chunkScorerConfig != null - ? new ChunkScorerConfig(chunkScorerConfig.numChunks, inferenceText, chunkScorerConfig.chunkingSettings()) + ? new ChunkScorerConfig(chunkScorerConfig.size, inferenceText, chunkScorerConfig.chunkingSettings()) : null ) ); @@ -252,8 +252,8 @@ protected void doToXContent(XContentBuilder builder, Params params) throws IOExc } if (chunkScorerConfig != null) { builder.startObject(CHUNK_RESCORER_FIELD.getPreferredName()); - if (chunkScorerConfig.numChunks() != null) { - builder.field(NUM_CHUNKS_FIELD.getPreferredName(), chunkScorerConfig.numChunks()); + if (chunkScorerConfig.size() != null) { + builder.field(CHUNK_SIZE_FIELD.getPreferredName(), chunkScorerConfig.size()); } if (chunkScorerConfig.chunkingSettings() != null) { builder.field(CHUNKING_SETTINGS_FIELD.getPreferredName(), chunkScorerConfig.chunkingSettings().asMap()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java index ff1900f2eaca1..60adefd3493d2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java @@ -23,7 +23,7 @@ import java.io.IOException; import java.util.List; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.ChunkScorerConfig.DEFAULT_NUM_CHUNKS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.ChunkScorerConfig.DEFAULT_SIZE; public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends RerankingRankFeaturePhaseRankShardContext { @@ -47,7 +47,7 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI DocumentField docField = hit.field(field); if (docField != null) { if (chunkScorerConfig != null) { - int numChunks = chunkScorerConfig.numChunks() != null ? chunkScorerConfig.numChunks() : DEFAULT_NUM_CHUNKS; + int size = chunkScorerConfig.size() != null ? chunkScorerConfig.size() : DEFAULT_SIZE; List chunkOffsets = chunker.chunk(docField.getValue().toString(), chunkingSettings); List chunks = chunkOffsets.stream() .map(offset -> { return docField.getValue().toString().substring(offset.start(), offset.end()); }) @@ -59,9 +59,9 @@ public RankShardResult doBuildRankFeatureShardResult(SearchHits hits, int shardI List scoredChunks = scorer.scoreChunks( chunks, chunkScorerConfig.inferenceText(), - numChunks + size ); - bestChunks = scoredChunks.stream().map(MemoryIndexChunkScorer.ScoredChunk::content).limit(numChunks).toList(); + bestChunks = scoredChunks.stream().map(MemoryIndexChunkScorer.ScoredChunk::content).limit(size).toList(); } catch (IOException e) { throw new IllegalStateException("Could not generate chunks for input to reranker", e); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index ba616a893ab58..ab581c33b8dcd 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -522,7 +522,7 @@ setup: reason: rescore_chunks introduced in 9.2.0 - do: - catch: /num_chunks must be greater than 0/ + catch: /size must be greater than 0/ search: index: test-index body: @@ -539,7 +539,7 @@ setup: inference_text: "How often does the moon hide the sun?" field: inference_text_field chunk_rescorer: - num_chunks: 0 + size: 0 size: 10 - match: { status: 400 } @@ -570,7 +570,7 @@ setup: inference_text: "How often does the moon hide the sun?" field: text chunk_rescorer: - num_chunks: 2 + size: 2 size: 10 - match: { hits.total.value: 2 } @@ -638,7 +638,7 @@ setup: inference_text: "how often does the moon hide the sun?" field: semantic_text_field chunk_rescorer: - num_chunks: 2 + size: 2 size: 10 - match: { hits.total.value: 2 } From ed13074a592e0f1b5231c09455bec462dec37787 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 8 Sep 2025 14:47:14 +0000 Subject: [PATCH 13/24] [CI] Auto commit changes from spotless --- .../chunking/SentenceBoundaryChunkingSettings.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java index b5da6af5f5985..25b5d248f294b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java @@ -65,11 +65,7 @@ public void validate() { if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT) { validationException.addValidationError( - ChunkingSettingsOptions.MAX_CHUNK_SIZE - + "[" - + maxChunkSize - + "] must be above " - + MAX_CHUNK_SIZE_LOWER_LIMIT + ChunkingSettingsOptions.MAX_CHUNK_SIZE + "[" + maxChunkSize + "] must be above " + MAX_CHUNK_SIZE_LOWER_LIMIT ); } From 6a06b8491199ba4ba4447def6449175df2ff0427 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 8 Sep 2025 10:51:14 -0400 Subject: [PATCH 14/24] Fix error in merge --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 + .../org/elasticsearch/search/rank/feature/RankFeatureDoc.java | 1 + 2 files changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index bbabb17549e46..71418bdd31ec7 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -336,6 +336,7 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_LLAMA_ADDED = def(9_125_0_00); public static final TransportVersion SHARD_WRITE_LOAD_IN_CLUSTER_INFO = def(9_126_0_00); public static final TransportVersion ESQL_SAMPLE_OPERATOR_STATUS = def(9_127_0_00); + public static final TransportVersion RERANK_RESCORE_CHUNKS = def(9_130_0_00); public static final TransportVersion PROJECT_RESERVED_STATE_MOVE_TO_REGISTRY = def(9_147_0_00); public static final TransportVersion STREAMS_ENDPOINT_PARAM_RESTRICTIONS = def(9_148_0_00); public static final TransportVersion RESOLVE_INDEX_MODE_FILTER = def(9_149_0_00); diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java index 26bd4c2206577..30eabb009432b 100644 --- a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java +++ b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java @@ -11,6 +11,7 @@ import org.apache.lucene.search.Explanation; import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.search.rank.RankDoc; From 3c695ac89b2560a3fc7adcc3ffcb16f6ee76418c Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 8 Sep 2025 11:42:02 -0400 Subject: [PATCH 15/24] Fix transport version issues after they were consolidated in main --- .../main/java/org/elasticsearch/TransportVersions.java | 1 - .../search/rank/feature/RankFeatureDoc.java | 4 ++-- .../inference/chunking/RecursiveChunkingSettings.java | 10 ++-------- .../chunking/WordBoundaryChunkingSettings.java | 10 ++-------- .../rank/textsimilarity/TextSimilarityRankBuilder.java | 4 ++-- 5 files changed, 8 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 71418bdd31ec7..bbabb17549e46 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -336,7 +336,6 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_LLAMA_ADDED = def(9_125_0_00); public static final TransportVersion SHARD_WRITE_LOAD_IN_CLUSTER_INFO = def(9_126_0_00); public static final TransportVersion ESQL_SAMPLE_OPERATOR_STATUS = def(9_127_0_00); - public static final TransportVersion RERANK_RESCORE_CHUNKS = def(9_130_0_00); public static final TransportVersion PROJECT_RESERVED_STATE_MOVE_TO_REGISTRY = def(9_147_0_00); public static final TransportVersion STREAMS_ENDPOINT_PARAM_RESTRICTIONS = def(9_148_0_00); public static final TransportVersion RESOLVE_INDEX_MODE_FILTER = def(9_149_0_00); diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java index 30eabb009432b..e7c6865088774 100644 --- a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java +++ b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java @@ -39,7 +39,7 @@ public RankFeatureDoc(int doc, float score, int shardIndex) { public RankFeatureDoc(StreamInput in) throws IOException { super(in); - if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + if (in.getTransportVersion().supports(RERANK_SNIPPETS)) { featureData = in.readOptionalStringCollectionAsList(); } else { String featureDataString = in.readOptionalString(); @@ -58,7 +58,7 @@ public void featureData(List featureData) { @Override protected void doWriteTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + if (out.getTransportVersion().supports(RERANK_SNIPPETS)) { out.writeOptionalStringCollection(featureData); } else { out.writeOptionalString(featureData.get(0)); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java index f60e26848bd32..be6bdb6b16b1c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java @@ -56,15 +56,9 @@ public RecursiveChunkingSettings(StreamInput in) throws IOException { public void validate() { ValidationException validationException = new ValidationException(); - if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) { + if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT) { validationException.addValidationError( - ChunkingSettingsOptions.MAX_CHUNK_SIZE - + "[" - + maxChunkSize - + "] must be between " - + MAX_CHUNK_SIZE_LOWER_LIMIT - + " and " - + MAX_CHUNK_SIZE_UPPER_LIMIT + ChunkingSettingsOptions.MAX_CHUNK_SIZE + "[" + maxChunkSize + "] must be above " + MAX_CHUNK_SIZE_LOWER_LIMIT ); if (separators != null && separators.isEmpty()) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java index 1efb550d5833b..055df300bfd3e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java @@ -52,15 +52,9 @@ public WordBoundaryChunkingSettings(StreamInput in) throws IOException { public void validate() { ValidationException validationException = new ValidationException(); - if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) { + if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT) { validationException.addValidationError( - ChunkingSettingsOptions.MAX_CHUNK_SIZE - + "[" - + maxChunkSize - + "] must be between " - + MAX_CHUNK_SIZE_LOWER_LIMIT - + " and " - + MAX_CHUNK_SIZE_UPPER_LIMIT + ChunkingSettingsOptions.MAX_CHUNK_SIZE + "[" + maxChunkSize + "] must be above " + MAX_CHUNK_SIZE_LOWER_LIMIT ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java index a3a97d25e06b4..2a8af99721064 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java @@ -90,7 +90,7 @@ public TextSimilarityRankBuilder(StreamInput in) throws IOException { } else { this.failuresAllowed = false; } - if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + if (in.getTransportVersion().supports(RERANK_SNIPPETS)) { this.chunkScorerConfig = in.readOptionalWriteable(ChunkScorerConfig::new); } else { this.chunkScorerConfig = null; @@ -118,7 +118,7 @@ public void doWriteTo(StreamOutput out) throws IOException { || out.getTransportVersion().onOrAfter(TransportVersions.RERANKER_FAILURES_ALLOWED)) { out.writeBoolean(failuresAllowed); } - if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) { + if (out.getTransportVersion().supports(RERANK_SNIPPETS)) { out.writeOptionalWriteable(chunkScorerConfig); } } From 386f3b8a7c8819b9c542a8f071b4b87f57115dfe Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 8 Sep 2025 15:49:21 +0000 Subject: [PATCH 16/24] [CI] Auto commit changes from spotless --- .../org/elasticsearch/search/rank/feature/RankFeatureDoc.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java index e7c6865088774..46c0225d7e3a0 100644 --- a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java +++ b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java @@ -11,7 +11,6 @@ import org.apache.lucene.search.Explanation; import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.search.rank.RankDoc; From e55ccfe87089348d06641de6b6b620a604417535 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 10 Sep 2025 08:41:53 -0400 Subject: [PATCH 17/24] Add feature flag to InferenceUpgradeTestCase --- .../xpack/application/InferenceUpgradeTestCase.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index 7b28c562b13fc..49054f4662e0d 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.FeatureFlag; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.upgrades.ParameterizedRollingUpgradeTestCase; @@ -44,6 +45,7 @@ public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { .nodes(NODE_NUM) .setting("xpack.security.enabled", "false") .setting("xpack.license.self_generated.type", "trial") + .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .build(); @Override From ef024cf141f02b9c891d8ab656e3e543d2a189db Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 09:08:43 -0400 Subject: [PATCH 18/24] Yolo see if this fixes the test --- .../xpack/application/InferenceUpgradeTestCase.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index 49054f4662e0d..78d82fd4e86c8 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -9,6 +9,7 @@ import com.carrotsearch.randomizedtesting.annotations.Name; +import org.elasticsearch.Build; import org.elasticsearch.client.Request; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -34,6 +35,9 @@ public class InferenceUpgradeTestCase extends ParameterizedRollingUpgradeTestCas public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); + if (clusterHasFeature(FeatureFlag.RERANK_RESCORE_CHUNKS.name()) == false) { + assumeTrue("Skipping Rerank chunks", Build.current().isSnapshot()); + } } // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster From c20884552e67ea9ab9b3515b136b584894f7b4fe Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 10:48:15 -0400 Subject: [PATCH 19/24] Real fix for upgrade IT --- .../xpack/application/InferenceUpgradeTestCase.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index 78d82fd4e86c8..e4ceaeba78108 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -35,9 +35,8 @@ public class InferenceUpgradeTestCase extends ParameterizedRollingUpgradeTestCas public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); - if (clusterHasFeature(FeatureFlag.RERANK_RESCORE_CHUNKS.name()) == false) { - assumeTrue("Skipping Rerank chunks", Build.current().isSnapshot()); - } + // TODO Remove when feature flag is removed + assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks") ); } // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster @@ -49,7 +48,6 @@ public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { .nodes(NODE_NUM) .setting("xpack.security.enabled", "false") .setting("xpack.license.self_generated.type", "trial") - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) .build(); @Override From cc1e913da49db2d9077d5ce6ceb8aa9a9cb15ae3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 11 Sep 2025 14:57:39 +0000 Subject: [PATCH 20/24] [CI] Auto commit changes from spotless --- .../xpack/application/InferenceUpgradeTestCase.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index e4ceaeba78108..7681dd5595c10 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -9,13 +9,11 @@ import com.carrotsearch.randomizedtesting.annotations.Name; -import org.elasticsearch.Build; import org.elasticsearch.client.Request; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.cluster.FeatureFlag; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.http.MockWebServer; import org.elasticsearch.upgrades.ParameterizedRollingUpgradeTestCase; @@ -36,7 +34,7 @@ public class InferenceUpgradeTestCase extends ParameterizedRollingUpgradeTestCas public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); // TODO Remove when feature flag is removed - assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks") ); + assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks")); } // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster From d0813b2ad00363e490ce3af3381fce65bec0694a Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 14:11:07 -0400 Subject: [PATCH 21/24] Another ignore --- .../upgrades/AbstractRollingUpgradeWithSecurityTestCase.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java index 961e4a353df95..d40cfad90e048 100644 --- a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java +++ b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java @@ -35,6 +35,10 @@ public abstract class AbstractRollingUpgradeWithSecurityTestCase extends Paramet private static final ElasticsearchCluster cluster = buildCluster(); private static ElasticsearchCluster buildCluster() { + + // TODO Remove when feature flag is removed + assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks")); + // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster // builder uses to lookup a particular distribution var cluster = ElasticsearchCluster.local() From 187106ffec66f46fe3e78d7fdddf97c9e65f826a Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 14:42:45 -0400 Subject: [PATCH 22/24] Revert "Another ignore" This reverts commit d0813b2ad00363e490ce3af3381fce65bec0694a. --- .../upgrades/AbstractRollingUpgradeWithSecurityTestCase.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java index d40cfad90e048..961e4a353df95 100644 --- a/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java +++ b/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/AbstractRollingUpgradeWithSecurityTestCase.java @@ -35,10 +35,6 @@ public abstract class AbstractRollingUpgradeWithSecurityTestCase extends Paramet private static final ElasticsearchCluster cluster = buildCluster(); private static ElasticsearchCluster buildCluster() { - - // TODO Remove when feature flag is removed - assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks")); - // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster // builder uses to lookup a particular distribution var cluster = ElasticsearchCluster.local() From af95d5739dcc2f6057df4f895d9cb5b5c54e9ae2 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 15:17:28 -0400 Subject: [PATCH 23/24] let's try reverting the renamed feature flag. If this is the cause of the test failures then :table-flip: --- .../rest/yaml/CcsCommonYamlTestSuiteIT.java | 2 +- .../yaml/RcsCcsCommonYamlTestSuiteIT.java | 2 +- ...okeTestMultiNodeClientYamlTestSuiteIT.java | 2 +- .../test/rest/ClientYamlTestSuiteIT.java | 2 +- .../test/cluster/FeatureFlag.java | 2 +- .../application/InferenceUpgradeTestCase.java | 2 +- .../xpack/inference/InferenceFeatures.java | 8 ++++---- .../TextSimilarityRankRetrieverBuilder.java | 6 +++--- .../xpack/inference/InferenceRestIT.java | 2 +- .../70_text_similarity_rank_retriever.yml | 20 +++++++++---------- ...CoreWithSecurityClientYamlTestSuiteIT.java | 2 +- 11 files changed, 25 insertions(+), 25 deletions(-) diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java index 33d986f1cf56a..80908dfc6ab1e 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java @@ -101,7 +101,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS); + .feature(FeatureFlag.RERANK_SNIPPETS); private static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local() .name(REMOTE_CLUSTER_NAME) diff --git a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java index f9041aa23d173..e37c553545fdf 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java +++ b/qa/ccs-common-rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java @@ -100,7 +100,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) + .feature(FeatureFlag.RERANK_SNIPPETS) .user("test_admin", "x-pack-test-password"); private static ElasticsearchCluster fulfillingCluster = ElasticsearchCluster.local() diff --git a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java index 36fe161d02bf2..2be870dbf4ea5 100644 --- a/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java +++ b/qa/smoke-test-multinode/src/yamlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java @@ -40,7 +40,7 @@ public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTe .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) + .feature(FeatureFlag.RERANK_SNIPPETS) .build(); public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java index e5a53a5016c0f..739b6fd755aa8 100644 --- a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java +++ b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java @@ -40,7 +40,7 @@ public class ClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) + .feature(FeatureFlag.RERANK_SNIPPETS) .build(); public ClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index 29afaab962b72..888c4afbf3326 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -24,7 +24,7 @@ public enum FeatureFlag { LOGS_STREAM("es.logs_stream_feature_flag_enabled=true", Version.fromString("9.1.0"), null), PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null), SYNTHETIC_VECTORS("es.mapping_synthetic_vectors=true", Version.fromString("9.2.0"), null), - RERANK_RESCORE_CHUNKS("es.text_similarity_reranker_rescore_chunks=true", Version.fromString("9.2.0"), null); + RERANK_SNIPPETS("es.text_similarity_reranker_snippets=true", Version.fromString("9.2.0"), null); public final String systemProperty; public final Version from; diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index 7681dd5595c10..360bcd2a660d0 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -34,7 +34,7 @@ public class InferenceUpgradeTestCase extends ParameterizedRollingUpgradeTestCas public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); // TODO Remove when feature flag is removed - assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_rescore_chunks")); + assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_snippets")); } // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 9e3ee737c650a..f6b5a8760aed0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -26,8 +26,8 @@ import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED; import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor.SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_RESCORE_CHUNKS; -import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_SNIPPETS; +import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_SNIPPETS; /** * Provides inference features. @@ -88,8 +88,8 @@ public Set getTestFeatures() { SemanticQueryBuilder.SEMANTIC_QUERY_FILTER_FIELD_CAPS_FIX ) ); - if (RERANK_RESCORE_CHUNKS.isEnabled()) { - testFeatures.add(TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS); + if (RERANK_SNIPPETS.isEnabled()) { + testFeatures.add(TEXT_SIMILARITY_RERANKER_SNIPPETS); } return testFeatures; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 8571775d70795..74e8ff2bd4042 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -44,8 +44,8 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder "text_similarity_reranker_alias_handling_fix" ); public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix"); - public static final NodeFeature TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS = new NodeFeature("text_similarity_reranker_rescore_chunks"); - public static final FeatureFlag RERANK_RESCORE_CHUNKS = new FeatureFlag("text_similarity_reranker_rescore_chunks"); + public static final NodeFeature TEXT_SIMILARITY_RERANKER_SNIPPETS = new NodeFeature("text_similarity_reranker_snippets"); + public static final FeatureFlag RERANK_SNIPPETS = new FeatureFlag("text_similarity_reranker_snippets"); public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id"); @@ -98,7 +98,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder PARSER.declareInt(optionalConstructorArg(), RANK_WINDOW_SIZE_FIELD); PARSER.declareBoolean(optionalConstructorArg(), FAILURES_ALLOWED_FIELD); PARSER.declareObject(optionalConstructorArg(), CHUNK_SCORER_PARSER, CHUNK_RESCORER_FIELD); - if (RERANK_RESCORE_CHUNKS.isEnabled()) { + if (RERANK_SNIPPETS.isEnabled()) { CHUNK_SCORER_PARSER.declareInt(optionalConstructorArg(), CHUNK_SIZE_FIELD); CHUNK_SCORER_PARSER.declareObjectOrNull(optionalConstructorArg(), (p, c) -> p.map(), null, CHUNKING_SETTINGS_FIELD); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java b/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java index 1857989bd1024..d15016ee9f410 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java +++ b/x-pack/plugin/inference/src/yamlRestTest/java/org/elasticsearch/xpack/inference/InferenceRestIT.java @@ -34,7 +34,7 @@ public class InferenceRestIT extends ESClientYamlSuiteTestCase { .setting("xpack.security.enabled", "false") .setting("xpack.security.http.ssl.enabled", "false") .setting("xpack.license.self_generated.type", "trial") - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) + .feature(FeatureFlag.RERANK_SNIPPETS) .plugin("inference-service-test") .distribution(DistributionType.DEFAULT) .build(); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index ab581c33b8dcd..d971aad2bbc4b 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -518,7 +518,7 @@ setup: "Text similarity reranker specifying number of rescore_chunks must be > 0": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -548,7 +548,7 @@ setup: "Reranking based on rescore_chunks": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -583,7 +583,7 @@ setup: "Reranking based on rescore_chunks using defaults": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -616,7 +616,7 @@ setup: "Reranking based on rescore_chunks on a semantic_text field": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -651,7 +651,7 @@ setup: "Reranking based on rescore_chunks on a semantic_text field using defaults": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -685,7 +685,7 @@ setup: "Reranking based on rescore_chunks on a semantic_text field specifying chunking settings": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -723,7 +723,7 @@ setup: "Reranking based on rescore_chunks on a semantic_text field specifying chunking settings requires valid chunking settings": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -757,7 +757,7 @@ setup: "Reranking based on rescore_chunks on a semantic_text field specifying chunk size": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -792,7 +792,7 @@ setup: "Reranking based on chunk_rescorer specifying only max chunk size will default remaining chunking settings": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: @@ -830,7 +830,7 @@ setup: "Reranking based on chunk_rescorer will send in first chunk if no text matches found": - requires: - cluster_features: "text_similarity_reranker_rescore_chunks" + cluster_features: "text_similarity_reranker_snippets" reason: rescore_chunks introduced in 9.2.0 - do: diff --git a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java index d792d4932d80a..88c754b257f5e 100644 --- a/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java +++ b/x-pack/qa/core-rest-tests-with-security/src/yamlRestTest/java/org/elasticsearch/xpack/security/CoreWithSecurityClientYamlTestSuiteIT.java @@ -54,7 +54,7 @@ public class CoreWithSecurityClientYamlTestSuiteIT extends ESClientYamlSuiteTest .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT) .feature(FeatureFlag.IVF_FORMAT) .feature(FeatureFlag.SYNTHETIC_VECTORS) - .feature(FeatureFlag.RERANK_RESCORE_CHUNKS) + .feature(FeatureFlag.RERANK_SNIPPETS) .build(); public CoreWithSecurityClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { From d0dd688787d5737eca486a53291c6223dc9b5e1a Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 11 Sep 2025 15:50:10 -0400 Subject: [PATCH 24/24] Remove ignored test --- .../xpack/application/InferenceUpgradeTestCase.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java index 360bcd2a660d0..7b28c562b13fc 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/InferenceUpgradeTestCase.java @@ -33,8 +33,6 @@ public class InferenceUpgradeTestCase extends ParameterizedRollingUpgradeTestCas public InferenceUpgradeTestCase(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); - // TODO Remove when feature flag is removed - assumeFalse("Rerank chunks behind feature flag", clusterHasFeature("text_similarity_reranker_snippets")); } // Note we need to use OLD_CLUSTER_VERSION directly here, as it may contain special values (e.g. 0.0.0) the ElasticsearchCluster