From 2856899dbd76bdba9f4f683d787f449ca0305204 Mon Sep 17 00:00:00 2001
From: kosabogi <105062005+kosabogi@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:37:29 +0200
Subject: [PATCH] Adds new parameters to the elasticsearch inference API for
 the rerank task type (#5476)

* Adds new parameters to the elasticsearch inference rerank API

* Adds unique inference chunking settings for elasticsearch

* Addresses suggestions

(cherry picked from commit a53ef07df03c1077befa1e78451fb3f0f21a8839)
---
 specification/inference/_types/CommonTypes.ts  | 18 ++++++++++++++++++
 specification/inference/_types/Services.ts     |  4 +++-
 .../PutElasticsearchRequest.ts                 |  2 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 25d1be939a..ca3cee712f 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1306,6 +1306,24 @@ export class ElasticsearchServiceSettings {
    * The maximum value is 32.
    */
   num_threads: integer
+  /**
+   * Available only for the `rerank` task type using the Elastic reranker model.
+   * Controls the strategy used for processing long documents during inference.
+   *
+   * Possible values:
+   * - `truncate` (default): Processes only the beginning of each document.
+   * - `chunk`: Splits long documents into smaller parts (chunks) before inference.
+   *
+   * When `long_document_strategy` is set to `chunk`, Elasticsearch splits each document into smaller parts but still returns a single score per document.
+   * That score reflects the highest relevance score among all chunks.
+   */
+  long_document_strategy?: string
+  /**
+   * Only for the `rerank` task type.
+   * Limits the number of chunks per document that are sent for inference when chunking is enabled.
+   * If not set, all chunks generated for the document are processed.
+   */
+  max_chunks_per_doc?: integer
 }
 
 export class ElasticsearchTaskSettings {
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index 788604d1d1..b9d5fb0972 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -50,7 +50,9 @@ import {
  */
 export class InferenceEndpoint {
   /**
-   * Chunking configuration object
+   * The chunking configuration object.
+   * Applies only to the `sparse_embedding` and `text_embedding` task types.
+   * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
    */
   chunking_settings?: InferenceChunkingSettings
   /**
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index b57b062f67..b35720d1c7 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -79,6 +79,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings