elastic · kosabogi · Oct 20, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1306,6 +1306,23 @@ export class ElasticsearchServiceSettings {
    * The maximum value is 32.
    */
   num_threads: integer
+  /**
+   * Only for the `rerank` task type.
+   * Controls the strategy used for processing long documents during inference.
+   *
+   * Possible values:
+   * - `truncate` (default): Processes only the beginning of each document.
+   * - `chunk`: Splits long documents into smaller parts (chunks) before inference.
+   *
+   * To enable chunking, set this value to `chunk`.
+   */
+  long_document_strategy?: string
+  /**
+   * Only for the `rerank` task type.
+   * Limits the number of chunks per document that are sent for inference when chunking is enabled.
+   * If not set, all chunks generated for the document are processed.
+   */
+  max_chunks_per_doc?: integer
 }
 
 export class ElasticsearchTaskSettings {

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -322,6 +322,67 @@ export class InferenceEndpointInfoWatsonx extends InferenceEndpoint {
   task_type: TaskTypeWatsonx
 }
 
+/**
+ * Chunking configuration object
+ */
+export class ElasticsearchInferenceChunkingSettings {
+  /**
+   * The maximum size of a chunk in words.
+   * This value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).
+   * This value should not exceed the window size for the associated model.
+   * @server_default 250
+   */
+  max_chunk_size?: integer
+  /**
+   * The number of overlapping words for chunks.
+   * It is applicable only to a `word` chunking strategy.
+   * This value cannot be higher than half the `max_chunk_size` value.
+   * @server_default 100
+   */
+  overlap?: integer
+  /**
+   * The number of overlapping sentences for chunks.
+   * It is applicable only for a `sentence` chunking strategy.
+   * It can be either `1` or `0`.
+   * @server_default 1
+   */
+  sentence_overlap?: integer
+  /**
+   * Only applicable to the `recursive` strategy and required when using it.
+   *
+   * Sets a predefined list of separators in the saved chunking settings based on the selected text type.
+   * Values can be `markdown` or `plaintext`.
+   *
+   * Using this parameter is an alternative to manually specifying a custom `separators` list.
+   */
+  separator_group?: string
+  /**
+   * Only applicable to the `recursive` strategy and required when using it.
+   *
+   * A list of strings used as possible split points when chunking text.
+   *
+   * Each string can be a plain string or a regular expression (regex) pattern.
+   * The system tries each separator in order to split the text, starting from the first item in the list.
+   *
+   * After splitting, it attempts to recombine smaller pieces into larger chunks that stay within
+   * the `max_chunk_size` limit, to reduce the total number of chunks generated.
+   */
+  separators?: string[]
+  /**
+   * The chunking strategy: `sentence`, `word`, `none` or `recursive`.
+   *
+   *  * If `strategy` is set to `recursive`, you must also specify:
+   *
+   * - `max_chunk_size`
+   * - either `separators` or`separator_group`
+   *
+   * Learn more about different chunking strategies in the linked documentation.
+   * @server_default sentence
+   * @ext_doc_id chunking-strategies
+   */
+  strategy?: string
+}
+
 /**
  * Chunking configuration object
  */

diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -26,7 +26,7 @@ import {
   ElasticsearchTaskSettings,
   ElasticsearchTaskType
 } from '@inference/_types/CommonTypes'
-import { InferenceChunkingSettings } from '@inference/_types/Services'
+import { ElasticsearchInferenceChunkingSettings } from '@inference/_types/Services'
 
 /**
  * Create an Elasticsearch inference endpoint.
@@ -78,10 +78,10 @@ export interface Request extends RequestBase {
   }
   body: {
     /**
-     * The chunking configuration object.
+     * The chunking configuration object. For the `rerank` task type, you can enable chunking by setting the `long_document_strategy` parameter to `chunk` in the `service_settings` object.
      * @ext_doc_id inference-chunking
      */
-    chunking_settings?: InferenceChunkingSettings
+    chunking_settings?: ElasticsearchInferenceChunkingSettings
     /**
      * The type of service supported for the specified task type. In this case, `elasticsearch`.
      */