Cleanup

kderusso · kderusso · commit c30da7275647 · 2025-07-01T08:58:50.000-04:00
diff --git a/server/src/main/java/org/elasticsearch/search/rank/context/RankFeaturePhaseRankCoordinatorContext.java b/server/src/main/java/org/elasticsearch/search/rank/context/RankFeaturePhaseRankCoordinatorContext.java
@@ -56,12 +56,15 @@ public boolean failuresAllowed() {
         return failuresAllowed;
     }
 
+    /**
+     * If non-null, we will rerank based on the best-ranking snippet rather than the whole text.
+     */
     public RerankSnippetInput snippets() {
         return snippets;
     }
 
     /**
-     * @return If snippets are requested, this should be overridden with the token size limit of the associated model.
+     * If snippets are requested, this should be overridden with the token size limit of the associated model.
      */
     public Integer tokenSizeLimit() {
         return 0;
diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java
@@ -27,6 +27,8 @@
 import java.util.Arrays;
 import java.util.Collections;
 
+import static org.elasticsearch.search.rank.feature.RerankSnippetInput.DEFAULT_NUM_SNIPPETS;
+
 /**
  * The {@code RankFeatureShardPhase} executes the rank feature phase on the shard, iff there is a {@code RankBuilder} that requires it.
  * This phase is responsible for reading field data for a set of docids. To do this, it reuses the {@code FetchPhase} to read the required
@@ -56,22 +58,23 @@ public static void prepareForFetch(SearchContext searchContext, RankFeatureShard
             searchContext.fetchFieldsContext(
                 new FetchFieldsContext(Collections.singletonList(new FieldAndFormat(rankFeaturePhaseRankShardContext.getField(), null)))
             );
-            try {
-                RerankSnippetInput snippets = request.snippets();
-                if (snippets != null) {
-                    // For POC purposes we're just stripping pre/post tags and deferring if/how we'd want to handle them for this use case.
+            RerankSnippetInput snippets = request.snippets();
+            if (snippets != null) {
+                try {
+                    // Stripping pre/post tags as they're not useful for snippet creation
                     HighlightBuilder highlightBuilder = new HighlightBuilder().field(field).preTags("").postTags("");
-                    // Force sorting by score to ensure that the first snippet is always the highest score
+                    // Return highest scoring fragments
                     highlightBuilder.order(HighlightBuilder.Order.SCORE);
-                    if (snippets.numSnippets() != null) {
-                        highlightBuilder.numOfFragments(snippets.numSnippets());
-                    }
+                    int numSnippets = snippets.numSnippets() != null ? snippets.numSnippets() : DEFAULT_NUM_SNIPPETS;
+                    highlightBuilder.numOfFragments(numSnippets);
+                    // Rely on the model to determine the fragment size
+                    // TODO highlighter should be able to set fragment size by token not length
                     highlightBuilder.fragmentSize(request.getTokenSizeLimit());
                     SearchHighlightContext searchHighlightContext = highlightBuilder.build(searchContext.getSearchExecutionContext());
                     searchContext.highlight(searchHighlightContext);
+                } catch (IOException e) {
+                    throw new RuntimeException("Failed to generate snippet request", e);
                 }
-            } catch (IOException e) {
-                throw new RuntimeException("Failed to create highlight context", e);
             }
             searchContext.storedFieldsContext(StoredFieldsContext.fromList(Collections.singletonList(StoredFieldsContext._NONE_)));
             searchContext.addFetchResult();
diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RerankSnippetInput.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RerankSnippetInput.java
@@ -17,13 +17,7 @@
 
 public record RerankSnippetInput(Integer numSnippets) implements Writeable {
 
-    private static final int DEFAULT_NUM_SNIPPETS = 1;
-
-    public RerankSnippetInput {
-        if (numSnippets == null) {
-            numSnippets = DEFAULT_NUM_SNIPPETS;
-        }
-    }
+    public static final int DEFAULT_NUM_SNIPPETS = 1;
 
     public RerankSnippetInput(StreamInput in) throws IOException {
         this(in.readOptionalVInt());
diff --git a/server/src/main/java/org/elasticsearch/search/rank/rerank/RerankingRankFeaturePhaseRankShardContext.java b/server/src/main/java/org/elasticsearch/search/rank/rerank/RerankingRankFeaturePhaseRankShardContext.java
@@ -55,12 +55,13 @@ public RankShardResult buildRankFeatureShardResult(SearchHits hits, int shardId)
                 DocumentField docField = hit.field(field);
                 if (docField != null && snippets == null) {
                     rankFeatureDocs[i].featureData(List.of(docField.getValue().toString()));
-                }
-                Map<String, HighlightField> highlightFields = hit.getHighlightFields();
-                if (highlightFields != null) {
-                    if (highlightFields.containsKey(field)) {
-                        List<String> snippets = Arrays.stream(highlightFields.get(field).fragments()).map(Text::string).toList();
-                        rankFeatureDocs[i].featureData(snippets);
+                } else {
+                    Map<String, HighlightField> highlightFields = hit.getHighlightFields();
+                    if (highlightFields != null) {
+                        if (highlightFields.containsKey(field)) {
+                            List<String> snippets = Arrays.stream(highlightFields.get(field).fragments()).map(Text::string).toList();
+                            rankFeatureDocs[i].featureData(snippets);
+                        }
                     }
                 }
             }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/random/RandomRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/random/RandomRankFeaturePhaseRankCoordinatorContext.java
@@ -26,11 +26,6 @@ public RandomRankFeaturePhaseRankCoordinatorContext(int size, int from, int rank
         this.seed = seed;
     }
 
-    @Override
-    public Integer tokenSizeLimit() {
-        return 0;
-    }
-
     @Override
     protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener<float[]> scoreListener) {
         // Generate random scores seeded by doc
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java
@@ -38,7 +38,15 @@
  */
 public class TextSimilarityRankFeaturePhaseRankCoordinatorContext extends RankFeaturePhaseRankCoordinatorContext {
 
+    /**
+     * The default token size limit of the Elastic reranker.
+     */
     private static final int RERANK_TOKEN_SIZE_LIMIT = 512;
+
+    /**
+     * A safe default token size limit for other reranker models.
+     * Reranker models with smaller token limits will be truncated.
+     */
     private static final int DEFAULT_TOKEN_SIZE_LIMIT = 4096;
 
     protected final Client client;
@@ -67,7 +75,6 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext(
     /**
      * @return The token size limit to apply to this rerank context.
      * This is not yet available so we are hardcoding it for now.
-     * See: https://github.com/elastic/ml-team/issues/1622
      */
     @Override
     public Integer tokenSizeLimit() {
@@ -222,8 +229,7 @@ private float[] extractScoresFromRankedSnippets(List<RankedDocsResults.RankedDoc
             .limit(rankedDocs.size())
             .toArray();
 
-        for (int i = 0; i < rankedDocs.size(); i++) {
-            RankedDocsResults.RankedDoc rankedDoc = rankedDocs.get(i);
+        for (RankedDocsResults.RankedDoc rankedDoc : rankedDocs) {
             int docId = rankedDocToFeatureDoc[rankedDoc.index()];
             float score = rankedDoc.relevanceScore();
             scores[docId] = hasScore[docId] == false ? score : Math.max(scores[docId], score);