From 2856899dbd76bdba9f4f683d787f449ca0305204 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 20 Oct 2025 13:37:29 +0200 Subject: [PATCH] Adds new parameters to the elasticsearch inference API for the rerank task type (#5476) * Adds new parameters to the elasticsearch inference rerank API * Adds unique inference chunking settings for elasticsearch * Addresses suggestions (cherry picked from commit a53ef07df03c1077befa1e78451fb3f0f21a8839) --- specification/inference/_types/CommonTypes.ts | 18 ++++++++++++++++++ specification/inference/_types/Services.ts | 4 +++- .../PutElasticsearchRequest.ts | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 25d1be939a..ca3cee712f 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1306,6 +1306,24 @@ export class ElasticsearchServiceSettings { * The maximum value is 32. */ num_threads: integer + /** + * Available only for the `rerank` task type using the Elastic reranker model. + * Controls the strategy used for processing long documents during inference. + * + * Possible values: + * - `truncate` (default): Processes only the beginning of each document. + * - `chunk`: Splits long documents into smaller parts (chunks) before inference. + * + * When `long_document_strategy` is set to `chunk`, Elasticsearch splits each document into smaller parts but still returns a single score per document. + * That score reflects the highest relevance score among all chunks. + */ + long_document_strategy?: string + /** + * Only for the `rerank` task type. + * Limits the number of chunks per document that are sent for inference when chunking is enabled. + * If not set, all chunks generated for the document are processed. + */ + max_chunks_per_doc?: integer } export class ElasticsearchTaskSettings { diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 788604d1d1..b9d5fb0972 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -50,7 +50,9 @@ import { */ export class InferenceEndpoint { /** - * Chunking configuration object + * The chunking configuration object. + * Applies only to the `sparse_embedding` and `text_embedding` task types. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. */ chunking_settings?: InferenceChunkingSettings /** diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts index b57b062f67..b35720d1c7 100644 --- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts +++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts @@ -79,6 +79,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `sparse_embedding` and `text_embedding` task types. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings