Skip to content

Commit f88d678

Browse files
committed
Increase highlighter fragment size for snippets
1 parent 9e53b17 commit f88d678

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankBuilder.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ public class TextSimilarityRankBuilder extends RankBuilder {
5252
public static final String NAME = "text_similarity_reranker";
5353

5454
/**
55-
* The default token size limit of the Elastic reranker.
55+
* The default token size limit of the Elastic reranker is 512.
5656
*/
5757
private static final int RERANK_TOKEN_SIZE_LIMIT = 512;
5858

5959
/**
60-
* A safe default token size limit for other reranker models.
60+
* 4096 is a safe default token size limit for other reranker models.
6161
* Reranker models with smaller token limits will be truncated.
6262
*/
63-
private static final int DEFAULT_TOKEN_SIZE_LIMIT = 4096;
63+
private static final int DEFAULT_TOKEN_SIZE_LIMIT = 4_096;
6464

6565
public static final LicensedFeature.Momentary TEXT_SIMILARITY_RERANKER_FEATURE = LicensedFeature.momentary(
6666
null,

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRerankingRankFeaturePhaseRankShardContext.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ public class TextSimilarityRerankingRankFeaturePhaseRankShardContext extends Rer
3434

3535
private final SnippetRankInput snippetRankInput;
3636

37+
// Rough approximation of token size vs. characters in highlight fragments.
38+
// TODO highlighter should be able to set fragment size by token not length
39+
private static final int TOKEN_SIZE_LIMIT_MULTIPLIER = 5;
40+
3741
public TextSimilarityRerankingRankFeaturePhaseRankShardContext(String field, @Nullable SnippetRankInput snippetRankInput) {
3842
super(field);
3943
this.snippetRankInput = snippetRankInput;
@@ -81,8 +85,9 @@ public void prepareForFetch(SearchContext context) {
8185
: DEFAULT_NUM_SNIPPETS;
8286
highlightBuilder.numOfFragments(numSnippets);
8387
// Rely on the model to determine the fragment size
84-
// TODO highlighter should be able to set fragment size by token not length
85-
highlightBuilder.fragmentSize(snippetRankInput.tokenSizeLimit());
88+
int tokenSizeLimit = snippetRankInput.tokenSizeLimit();
89+
int fragmentSize = tokenSizeLimit * TOKEN_SIZE_LIMIT_MULTIPLIER;
90+
highlightBuilder.fragmentSize(fragmentSize);
8691
SearchHighlightContext searchHighlightContext = highlightBuilder.build(context.getSearchExecutionContext());
8792
context.highlight(searchHighlightContext);
8893
} catch (IOException e) {

0 commit comments

Comments
 (0)