Skip to content

Commit 07ed01a

Browse files
committed
Increase values
1 parent 1cb529b commit 07ed01a

File tree

13 files changed

+15
-15
lines changed

13 files changed

+15
-15
lines changed

server/src/main/java/org/elasticsearch/inference/RerankingInferenceService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public interface RerankingInferenceService {
1414
/**
1515
* The default window size for small reranking models (512 input tokens).
1616
*/
17-
int CONSERVATIVE_DEFAULT_WINDOW_SIZE = 250;
17+
int CONSERVATIVE_DEFAULT_WINDOW_SIZE = 300;
1818

1919
/**
2020
* The reranking model's max window or an approximation of

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ public int rerankerWindowSize(String modelId) {
396396
// Alibaba's mGTE models support long context windows of up to 8192 tokens.
397397
// Using 1 token = 0.75 words, this translates to approximately 6144 words.
398398
// https://huggingface.co/Alibaba-NLP/gte-multilingual-reranker-base
399-
return 5000;
399+
return 5500;
400400
}
401401

402402
public static class Configuration {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ public int rerankerWindowSize(String modelId) {
367367
// Cohere rerank model truncates at 4096 tokens https://docs.cohere.com/reference/rerank
368368
// Using 1 token = 0.75 words as a rough estimate, we get 3072 words
369369
// allowing for some headroom, we set the window size below 3072
370-
return 2500;
370+
return 2800;
371371
}
372372

373373
public static class Configuration {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ static TaskType inferenceConfigToTaskType(InferenceConfig config) {
10641064
@Override
10651065
public int rerankerWindowSize(String modelId) {
10661066
// The Elastic reranker has a window size of 512 tokens.
1067-
// Return 250 words as a default that comfortably fits in the window.
1067+
// Return 300 words as a default that comfortably fits in the window.
10681068
// TODO custom rerank models may have larger windows, make this configurable
10691069
return RerankingInferenceService.CONSERVATIVE_DEFAULT_WINDOW_SIZE;
10701070
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ public int rerankerWindowSize(String modelId) {
392392
// TODO make the rerank window size configurable
393393

394394
if (modelId != null && modelId.endsWith("-004")) {
395-
return 500;
395+
return 600;
396396
} else {
397397
return RerankingInferenceService.CONSERVATIVE_DEFAULT_WINDOW_SIZE;
398398
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/jinaai/JinaAIService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ public int rerankerWindowSize(String modelId) {
353353
// Jina AI rerank models have an 8000 token input length https://jina.ai/models/jina-reranker-v2-base-multilingual
354354
// Using 1 token = 0.75 words as a rough estimate, we get 6000 words
355355
// allowing for some headroom, we set the window size below 6000 words
356-
return 5000;
356+
return 5500;
357357
}
358358

359359
public static class Configuration {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/voyageai/VoyageAIService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ public class VoyageAIService extends SenderService implements RerankingInference
9494

9595
private static final Map<String, Integer> RERANKERS_INPUT_SIZE = Map.of(
9696
"rerank-lite-1",
97-
2500 // The smallest model has a 4K context length https://docs.voyageai.com/docs/reranker
97+
2800 // The smallest model has a 4K context length https://docs.voyageai.com/docs/reranker
9898
);
9999

100100
/**
101101
* Apart from rerank-lite-1 all other models have a context length of at least 8k.
102102
* This value is based on 1 token == 0.75 words and allowing for some overhead
103103
*/
104-
private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 5000;
104+
private static final int DEFAULT_RERANKER_INPUT_SIZE_WORDS = 5500;
105105

106106
public static final EnumSet<InputType> VALID_INPUT_TYPE_VALUES = EnumSet.of(
107107
InputType.INGEST,

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,6 @@ public InferenceService createInferenceService() {
724724

725725
@Override
726726
protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
727-
assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(5000));
727+
assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(5500));
728728
}
729729
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,7 @@ public InferenceService createInferenceService() {
16911691

16921692
@Override
16931693
protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
1694-
assertThat(rerankingInferenceService.rerankerWindowSize("Any model"), is(250));
1694+
assertThat(rerankingInferenceService.rerankerWindowSize("Any model"), is(300));
16951695
}
16961696

16971697
private Map<String, Object> getRequestConfigMap(

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,6 @@ public InferenceService createInferenceService() {
16441644

16451645
@Override
16461646
protected void assertRerankerWindowSize(RerankingInferenceService rerankingInferenceService) {
1647-
assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(2500));
1647+
assertThat(rerankingInferenceService.rerankerWindowSize("any model"), is(2800));
16481648
}
16491649
}

0 commit comments

Comments
 (0)