elastic · kderusso · Sep 11, 2025 · Aug 26, 2025 · Aug 26, 2025 · Aug 27, 2025
diff --git a/docs/changelog/133576.yaml b/docs/changelog/133576.yaml
@@ -0,0 +1,5 @@
+pr: 133576
+summary: Text similarity reranker chunks and scores snippets
+area: Relevance
+type: enhancement
+issues: []
diff --git a/...rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java b/...rest/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/CcsCommonYamlTestSuiteIT.java
@@ -101,7 +101,7 @@ public class CcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
         .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED)
         .feature(FeatureFlag.IVF_FORMAT)
         .feature(FeatureFlag.SYNTHETIC_VECTORS)
-        .feature(FeatureFlag.RERANK_SNIPPETS);
+        .feature(FeatureFlag.RERANK_RESCORE_CHUNKS);
 
     private static ElasticsearchCluster remoteCluster = ElasticsearchCluster.local()
         .name(REMOTE_CLUSTER_NAME)

diff --git a/...t/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java b/...t/src/yamlRestTest/java/org/elasticsearch/test/rest/yaml/RcsCcsCommonYamlTestSuiteIT.java
@@ -100,7 +100,7 @@ public class RcsCcsCommonYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
         .feature(FeatureFlag.SUB_OBJECTS_AUTO_ENABLED)
         .feature(FeatureFlag.IVF_FORMAT)
         .feature(FeatureFlag.SYNTHETIC_VECTORS)
-        .feature(FeatureFlag.RERANK_SNIPPETS)
+        .feature(FeatureFlag.RERANK_RESCORE_CHUNKS)
         .user("test_admin", "x-pack-test-password");
 
     private static ElasticsearchCluster fulfillingCluster = ElasticsearchCluster.local()

diff --git a/...amlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java b/...amlRestTest/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java
@@ -40,7 +40,7 @@ public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTe
         .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT)
         .feature(FeatureFlag.IVF_FORMAT)
         .feature(FeatureFlag.SYNTHETIC_VECTORS)
-        .feature(FeatureFlag.RERANK_SNIPPETS)
+        .feature(FeatureFlag.RERANK_RESCORE_CHUNKS)
         .build();
 
     public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {

diff --git a/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java b/rest-api-spec/src/yamlRestTest/java/org/elasticsearch/test/rest/ClientYamlTestSuiteIT.java
@@ -40,7 +40,7 @@ public class ClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
         .feature(FeatureFlag.USE_LUCENE101_POSTINGS_FORMAT)
         .feature(FeatureFlag.IVF_FORMAT)
         .feature(FeatureFlag.SYNTHETIC_VECTORS)
-        .feature(FeatureFlag.RERANK_SNIPPETS)
+        .feature(FeatureFlag.RERANK_RESCORE_CHUNKS)
         .build();
 
     public ClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {

diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -353,7 +353,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ESQL_SAMPLE_OPERATOR_STATUS = def(9_127_0_00);
     public static final TransportVersion ESQL_TOPN_TIMINGS = def(9_128_0_00);
     public static final TransportVersion NODE_WEIGHTS_ADDED_TO_NODE_BALANCE_STATS = def(9_129_0_00);
-    public static final TransportVersion RERANK_SNIPPETS = def(9_130_0_00);
+    public static final TransportVersion RERANK_RESCORE_CHUNKS = def(9_130_0_00);
     public static final TransportVersion PIPELINE_TRACKING_INFO = def(9_131_0_00);
     public static final TransportVersion COMPONENT_TEMPLATE_TRACKING_INFO = def(9_132_0_00);
     public static final TransportVersion TO_CHILD_BLOCK_JOIN_QUERY = def(9_133_0_00);

diff --git a/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java b/server/src/main/java/org/elasticsearch/inference/ChunkingSettings.java
@@ -18,4 +18,6 @@ public interface ChunkingSettings extends ToXContentObject, VersionedNamedWritea
     ChunkingStrategy getChunkingStrategy();
 
     Map<String, Object> asMap();
+
+    default void validate() {}
 }
diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureDoc.java
@@ -36,7 +36,7 @@ public RankFeatureDoc(int doc, float score, int shardIndex) {
 
     public RankFeatureDoc(StreamInput in) throws IOException {
         super(in);
-        if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) {
+        if (in.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) {
             featureData = in.readOptionalStringCollectionAsList();
         } else {
             String featureDataString = in.readOptionalString();
@@ -55,7 +55,7 @@ public void featureData(List<String> featureData) {
 
     @Override
     protected void doWriteTo(StreamOutput out) throws IOException {
-        if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_SNIPPETS)) {
+        if (out.getTransportVersion().onOrAfter(TransportVersions.RERANK_RESCORE_CHUNKS)) {
             out.writeOptionalStringCollection(featureData);
         } else {
             out.writeOptionalString(featureData.get(0));

diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java
@@ -24,7 +24,7 @@ public enum FeatureFlag {
     LOGS_STREAM("es.logs_stream_feature_flag_enabled=true", Version.fromString("9.1.0"), null),
     PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null),
     SYNTHETIC_VECTORS("es.mapping_synthetic_vectors=true", Version.fromString("9.2.0"), null),
-    RERANK_SNIPPETS("es.text_similarity_reranker_snippets=true", Version.fromString("9.2.0"), null);
+    RERANK_RESCORE_CHUNKS("es.text_similarity_reranker_rescore_chunks=true", Version.fromString("9.2.0"), null);
 
     public final String systemProperty;
     public final Version from;

diff --git a/x-pack/plugin/core/src/main/java/module-info.java b/x-pack/plugin/core/src/main/java/module-info.java
@@ -231,6 +231,7 @@
     exports org.elasticsearch.xpack.core.watcher.watch;
     exports org.elasticsearch.xpack.core.watcher;
     exports org.elasticsearch.xpack.core.security.authc.apikey;
+    exports org.elasticsearch.xpack.core.common.chunks;
 
     provides org.elasticsearch.action.admin.cluster.node.info.ComponentVersionNumber
         with

diff --git a/...core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java b/...core/src/main/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorer.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.common.chunks;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.QueryBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utility class for scoring pre-determined chunks using an in-memory Lucene index.
+ */
+public class MemoryIndexChunkScorer {
+
+    private static final String CONTENT_FIELD = "content";
+
+    private final StandardAnalyzer analyzer;
+
+    public MemoryIndexChunkScorer() {
+        // TODO: Allow analyzer to be customizable and/or read from the field mapping
+        this.analyzer = new StandardAnalyzer();
+    }
+
+    /**
+     * Creates an in-memory index of chunks, or chunks, returns ordered, scored list.
+     *
+     * @param chunks the list of text chunks to score
+     * @param inferenceText the query text to compare against
+     * @param maxResults maximum number of results to return
+     * @return list of scored chunks ordered by relevance
+     * @throws IOException on failure scoring chunks
+     */
+    public List<ScoredChunk> scoreChunks(List<String> chunks, String inferenceText, int maxResults) throws IOException {
+        if (chunks == null || chunks.isEmpty() || inferenceText == null || inferenceText.trim().isEmpty()) {
+            return new ArrayList<>();
+        }
+
+        try (Directory directory = new ByteBuffersDirectory()) {
+            IndexWriterConfig config = new IndexWriterConfig(analyzer);
+            try (IndexWriter writer = new IndexWriter(directory, config)) {
+                for (String chunk : chunks) {
+                    Document doc = new Document();
+                    doc.add(new TextField(CONTENT_FIELD, chunk, Field.Store.YES));
+                    writer.addDocument(doc);
+                }
+                writer.commit();
+            }
+
+            try (DirectoryReader reader = DirectoryReader.open(directory)) {
+                IndexSearcher searcher = new IndexSearcher(reader);
+
+                org.apache.lucene.util.QueryBuilder qb = new QueryBuilder(analyzer);
+                Query query = qb.createBooleanQuery(CONTENT_FIELD, inferenceText, BooleanClause.Occur.SHOULD);
+                int numResults = Math.min(maxResults, chunks.size());
+                TopDocs topDocs = searcher.search(query, numResults);
+
+                List<ScoredChunk> scoredChunks = new ArrayList<>();
+                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+                    Document doc = reader.storedFields().document(scoreDoc.doc);
+                    String content = doc.get(CONTENT_FIELD);
+                    scoredChunks.add(new ScoredChunk(content, scoreDoc.score));
+                }
+
+                return scoredChunks;
+            }
+        }
+    }
+
+    private String[] tokenizeText(String text) throws IOException {
+        List<String> tokens = new ArrayList<>();
+        try (org.apache.lucene.analysis.TokenStream tokenStream = analyzer.tokenStream(CONTENT_FIELD, text)) {
+            org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAttribute = tokenStream.addAttribute(
+                org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class
+            );
+            tokenStream.reset();
+            while (tokenStream.incrementToken()) {
+                tokens.add(termAttribute.toString());
+            }
+            tokenStream.end();
+        }
+        return tokens.toArray(new String[0]);
+    }
+
+    /**
+     * Represents a chunk with its relevance score.
+     */
+    public record ScoredChunk(String content, float score) {}
+}
diff --git a/...src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java b/...src/test/java/org/elasticsearch/xpack/core/common/chunks/MemoryIndexChunkScorerTests.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.common.chunks;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.hamcrest.Matchers.greaterThan;
+
+public class MemoryIndexChunkScorerTests extends ESTestCase {
+
+    public void testScoreChunks() throws IOException {
+        MemoryIndexChunkScorer scorer = new MemoryIndexChunkScorer();
+
+        List<String> snippets = Arrays.asList(
+            "Cats like to sleep all day and play with mice",
+            "Dogs are loyal companions and great pets",
+            "The weather today is very sunny and warm",
+            "Dogs love to play with toys and go for walks",
+            "Elasticsearch is a great search engine"
+        );
+
+        String inferenceText = "dogs play walk";
+        int maxResults = 3;
+
+        List<MemoryIndexChunkScorer.ScoredChunk> scoredChunks = scorer.scoreChunks(snippets, inferenceText, maxResults);
+
+        assertEquals(maxResults, scoredChunks.size());
+
+        // The snippets about dogs should score highest, followed by the snippet about cats
+        MemoryIndexChunkScorer.ScoredChunk snippet = scoredChunks.getFirst();
+        assertTrue(snippet.content().equalsIgnoreCase("Dogs love to play with toys and go for walks"));
+        assertThat(snippet.score(), greaterThan(0f));
+
+        snippet = scoredChunks.get(1);
+        assertTrue(snippet.content().equalsIgnoreCase("Dogs are loyal companions and great pets"));
+        assertThat(snippet.score(), greaterThan(0f));
+
+        snippet = scoredChunks.get(2);
+        assertTrue(snippet.content().equalsIgnoreCase("Cats like to sleep all day and play with mice"));
+        assertThat(snippet.score(), greaterThan(0f));
+
+        // Scores should be in descending order
+        for (int i = 1; i < scoredChunks.size(); i++) {
+            assertTrue(scoredChunks.get(i - 1).score() >= scoredChunks.get(i).score());
+        }
+    }
+}
diff --git a/...k/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/...k/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java
@@ -25,8 +25,8 @@
 import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
 import static org.elasticsearch.xpack.inference.queries.SemanticMatchQueryRewriteInterceptor.SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
 import static org.elasticsearch.xpack.inference.queries.SemanticSparseVectorQueryRewriteInterceptor.SEMANTIC_SPARSE_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
-import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_SNIPPETS;
-import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_SNIPPETS;
+import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.RERANK_RESCORE_CHUNKS;
+import static org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS;
 
 /**
  * Provides inference features.
@@ -85,8 +85,8 @@ public Set<NodeFeature> getTestFeatures() {
                 SEMANTIC_TEXT_FIELDS_CHUNKS_FORMAT
             )
         );
-        if (RERANK_SNIPPETS.isEnabled()) {
-            testFeatures.add(TEXT_SIMILARITY_RERANKER_SNIPPETS);
+        if (RERANK_RESCORE_CHUNKS.isEnabled()) {
+            testFeatures.add(TEXT_SIMILARITY_RERANKER_RESCORE_CHUNKS);
         }
         return testFeatures;
     }

diff --git a/...e/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java b/...e/src/main/java/org/elasticsearch/xpack/inference/chunking/RecursiveChunkingSettings.java
@@ -53,6 +53,31 @@ public RecursiveChunkingSettings(StreamInput in) throws IOException {
         separators = in.readCollectionAsList(StreamInput::readString);
     }
 
+    @Override
+    public void validate() {
+        ValidationException validationException = new ValidationException();
+
+        if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) {
+            validationException.addValidationError(
+                ChunkingSettingsOptions.MAX_CHUNK_SIZE
+                    + "["
+                    + maxChunkSize
+                    + "] must be between "
+                    + MAX_CHUNK_SIZE_LOWER_LIMIT
+                    + " and "
+                    + MAX_CHUNK_SIZE_UPPER_LIMIT
+            );
+
+            if (separators != null && separators.isEmpty()) {
+                validationException.addValidationError("Recursive chunking settings can not have an empty list of separators");
+            }
+
+            if (validationException.validationErrors().isEmpty() == false) {
+                throw validationException;
+            }
+        }
+    }
+
     public static RecursiveChunkingSettings fromMap(Map<String, Object> map) {
         ValidationException validationException = new ValidationException();
 

diff --git a/...ain/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java b/...ain/java/org/elasticsearch/xpack/inference/chunking/SentenceBoundaryChunkingSettings.java
@@ -55,6 +55,33 @@ public SentenceBoundaryChunkingSettings(StreamInput in) throws IOException {
         }
     }
 
+    @Override
+    public void validate() {
+        ValidationException validationException = new ValidationException();
+
+        if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) {
+            validationException.addValidationError(
+                ChunkingSettingsOptions.MAX_CHUNK_SIZE
+                    + "["
+                    + maxChunkSize
+                    + "] must be between "
+                    + MAX_CHUNK_SIZE_LOWER_LIMIT
+                    + " and "
+                    + MAX_CHUNK_SIZE_UPPER_LIMIT
+            );
+        }
+
+        if (sentenceOverlap > 1 || sentenceOverlap < 0) {
+            validationException.addValidationError(
+                ChunkingSettingsOptions.SENTENCE_OVERLAP + "[" + sentenceOverlap + "] must be either 0 or 1"
+            );
+        }
+
+        if (validationException.validationErrors().isEmpty() == false) {
+            throw validationException;
+        }
+    }
+
     @Override
     public Map<String, Object> asMap() {
         return Map.of(

diff --git a/...rc/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java b/...rc/main/java/org/elasticsearch/xpack/inference/chunking/WordBoundaryChunkingSettings.java
@@ -49,6 +49,33 @@ public WordBoundaryChunkingSettings(StreamInput in) throws IOException {
         overlap = in.readInt();
     }
 
+    @Override
+    public void validate() {
+        ValidationException validationException = new ValidationException();
+
+        if (maxChunkSize < MAX_CHUNK_SIZE_LOWER_LIMIT || maxChunkSize > MAX_CHUNK_SIZE_UPPER_LIMIT) {
+            validationException.addValidationError(
+                ChunkingSettingsOptions.MAX_CHUNK_SIZE
+                    + "["
+                    + maxChunkSize
+                    + "] must be between "
+                    + MAX_CHUNK_SIZE_LOWER_LIMIT
+                    + " and "
+                    + MAX_CHUNK_SIZE_UPPER_LIMIT
+            );
+        }
+
+        if (overlap > maxChunkSize / 2) {
+            validationException.addValidationError(
+                ChunkingSettingsOptions.OVERLAP + "[" + overlap + "] must be less than or equal to half of max chunk size"
+            );
+        }
+
+        if (validationException.validationErrors().isEmpty() == false) {
+            throw validationException;
+        }
+    }
+
     @Override
     public Map<String, Object> asMap() {
         return Map.of(