Skip to content

Commit c30da72

Browse files
committed
Cleanup
1 parent b418f87 commit c30da72

File tree

6 files changed

+34
-32
lines changed

6 files changed

+34
-32
lines changed

server/src/main/java/org/elasticsearch/search/rank/context/RankFeaturePhaseRankCoordinatorContext.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,15 @@ public boolean failuresAllowed() {
5656
return failuresAllowed;
5757
}
5858

59+
/**
60+
* If non-null, we will rerank based on the best-ranking snippet rather than the whole text.
61+
*/
5962
public RerankSnippetInput snippets() {
6063
return snippets;
6164
}
6265

6366
/**
64-
* @return If snippets are requested, this should be overridden with the token size limit of the associated model.
67+
* If snippets are requested, this should be overridden with the token size limit of the associated model.
6568
*/
6669
public Integer tokenSizeLimit() {
6770
return 0;

server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import java.util.Arrays;
2828
import java.util.Collections;
2929

30+
import static org.elasticsearch.search.rank.feature.RerankSnippetInput.DEFAULT_NUM_SNIPPETS;
31+
3032
/**
3133
* The {@code RankFeatureShardPhase} executes the rank feature phase on the shard, iff there is a {@code RankBuilder} that requires it.
3234
* This phase is responsible for reading field data for a set of docids. To do this, it reuses the {@code FetchPhase} to read the required
@@ -56,22 +58,23 @@ public static void prepareForFetch(SearchContext searchContext, RankFeatureShard
5658
searchContext.fetchFieldsContext(
5759
new FetchFieldsContext(Collections.singletonList(new FieldAndFormat(rankFeaturePhaseRankShardContext.getField(), null)))
5860
);
59-
try {
60-
RerankSnippetInput snippets = request.snippets();
61-
if (snippets != null) {
62-
// For POC purposes we're just stripping pre/post tags and deferring if/how we'd want to handle them for this use case.
61+
RerankSnippetInput snippets = request.snippets();
62+
if (snippets != null) {
63+
try {
64+
// Stripping pre/post tags as they're not useful for snippet creation
6365
HighlightBuilder highlightBuilder = new HighlightBuilder().field(field).preTags("").postTags("");
64-
// Force sorting by score to ensure that the first snippet is always the highest score
66+
// Return highest scoring fragments
6567
highlightBuilder.order(HighlightBuilder.Order.SCORE);
66-
if (snippets.numSnippets() != null) {
67-
highlightBuilder.numOfFragments(snippets.numSnippets());
68-
}
68+
int numSnippets = snippets.numSnippets() != null ? snippets.numSnippets() : DEFAULT_NUM_SNIPPETS;
69+
highlightBuilder.numOfFragments(numSnippets);
70+
// Rely on the model to determine the fragment size
71+
// TODO highlighter should be able to set fragment size by token not length
6972
highlightBuilder.fragmentSize(request.getTokenSizeLimit());
7073
SearchHighlightContext searchHighlightContext = highlightBuilder.build(searchContext.getSearchExecutionContext());
7174
searchContext.highlight(searchHighlightContext);
75+
} catch (IOException e) {
76+
throw new RuntimeException("Failed to generate snippet request", e);
7277
}
73-
} catch (IOException e) {
74-
throw new RuntimeException("Failed to create highlight context", e);
7578
}
7679
searchContext.storedFieldsContext(StoredFieldsContext.fromList(Collections.singletonList(StoredFieldsContext._NONE_)));
7780
searchContext.addFetchResult();

server/src/main/java/org/elasticsearch/search/rank/feature/RerankSnippetInput.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,7 @@
1717

1818
public record RerankSnippetInput(Integer numSnippets) implements Writeable {
1919

20-
private static final int DEFAULT_NUM_SNIPPETS = 1;
21-
22-
public RerankSnippetInput {
23-
if (numSnippets == null) {
24-
numSnippets = DEFAULT_NUM_SNIPPETS;
25-
}
26-
}
20+
public static final int DEFAULT_NUM_SNIPPETS = 1;
2721

2822
public RerankSnippetInput(StreamInput in) throws IOException {
2923
this(in.readOptionalVInt());

server/src/main/java/org/elasticsearch/search/rank/rerank/RerankingRankFeaturePhaseRankShardContext.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,13 @@ public RankShardResult buildRankFeatureShardResult(SearchHits hits, int shardId)
5555
DocumentField docField = hit.field(field);
5656
if (docField != null && snippets == null) {
5757
rankFeatureDocs[i].featureData(List.of(docField.getValue().toString()));
58-
}
59-
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
60-
if (highlightFields != null) {
61-
if (highlightFields.containsKey(field)) {
62-
List<String> snippets = Arrays.stream(highlightFields.get(field).fragments()).map(Text::string).toList();
63-
rankFeatureDocs[i].featureData(snippets);
58+
} else {
59+
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
60+
if (highlightFields != null) {
61+
if (highlightFields.containsKey(field)) {
62+
List<String> snippets = Arrays.stream(highlightFields.get(field).fragments()).map(Text::string).toList();
63+
rankFeatureDocs[i].featureData(snippets);
64+
}
6465
}
6566
}
6667
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/random/RandomRankFeaturePhaseRankCoordinatorContext.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@ public RandomRankFeaturePhaseRankCoordinatorContext(int size, int from, int rank
2626
this.seed = seed;
2727
}
2828

29-
@Override
30-
public Integer tokenSizeLimit() {
31-
return 0;
32-
}
33-
3429
@Override
3530
protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener<float[]> scoreListener) {
3631
// Generate random scores seeded by doc

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,15 @@
3838
*/
3939
public class TextSimilarityRankFeaturePhaseRankCoordinatorContext extends RankFeaturePhaseRankCoordinatorContext {
4040

41+
/**
42+
* The default token size limit of the Elastic reranker.
43+
*/
4144
private static final int RERANK_TOKEN_SIZE_LIMIT = 512;
45+
46+
/**
47+
* A safe default token size limit for other reranker models.
48+
* Reranker models with smaller token limits will be truncated.
49+
*/
4250
private static final int DEFAULT_TOKEN_SIZE_LIMIT = 4096;
4351

4452
protected final Client client;
@@ -67,7 +75,6 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext(
6775
/**
6876
* @return The token size limit to apply to this rerank context.
6977
* This is not yet available so we are hardcoding it for now.
70-
* See: https://github.com/elastic/ml-team/issues/1622
7178
*/
7279
@Override
7380
public Integer tokenSizeLimit() {
@@ -222,8 +229,7 @@ private float[] extractScoresFromRankedSnippets(List<RankedDocsResults.RankedDoc
222229
.limit(rankedDocs.size())
223230
.toArray();
224231

225-
for (int i = 0; i < rankedDocs.size(); i++) {
226-
RankedDocsResults.RankedDoc rankedDoc = rankedDocs.get(i);
232+
for (RankedDocsResults.RankedDoc rankedDoc : rankedDocs) {
227233
int docId = rankedDocToFeatureDoc[rankedDoc.index()];
228234
float score = rankedDoc.relevanceScore();
229235
scores[docId] = hasScore[docId] == false ? score : Math.max(scores[docId], score);

0 commit comments

Comments
 (0)