From b34f9433d882fd11e9965ac0950ffdeb3551a31c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <szabosteve@gmail.com>
Date: Tue, 18 Feb 2025 14:14:56 +0100
Subject: [PATCH] [Inference API] Adds chunking_settings to PUT inference API
 (#3781)

* [Inference API] Adds chunking_settings to PUT inference API.

* [Inference API] Make contrib.

(cherry picked from commit bf2a5ca266971973db9525bd581cb7f3bf27d8ef)
---
 output/openapi/elasticsearch-openapi.json     | 31 +++++++
 .../elasticsearch-serverless-openapi.json     | 31 +++++++
 output/schema/schema-serverless.json          | 89 ++++++++++++++++++-
 output/schema/schema.json                     | 89 ++++++++++++++++++-
 output/typescript/types.ts                    |  8 ++
 specification/inference/_types/Services.ts    | 37 ++++++++
 6 files changed, 277 insertions(+), 8 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 993fec31e7..fac648e6ab 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -72993,6 +72993,9 @@
       "inference._types:InferenceEndpoint": {
         "type": "object",
         "properties": {
+          "chunking_settings": {
+            "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+          },
           "service": {
             "description": "The service type",
             "type": "string"
@@ -73009,6 +73012,34 @@
           "service_settings"
         ]
       },
+      "inference._types:InferenceChunkingSettings": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/inference._types:InferenceEndpoint"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "max_chunk_size": {
+                "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+                "type": "number"
+              },
+              "overlap": {
+                "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+                "type": "number"
+              },
+              "sentence_overlap": {
+                "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+                "type": "number"
+              },
+              "strategy": {
+                "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+                "type": "string"
+              }
+            }
+          }
+        ]
+      },
       "inference._types:ServiceSettings": {
         "type": "object"
       },
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 280bdf137e..1b4eec2f6b 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -46221,6 +46221,9 @@
       "inference._types:InferenceEndpoint": {
         "type": "object",
         "properties": {
+          "chunking_settings": {
+            "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
+          },
           "service": {
             "description": "The service type",
             "type": "string"
@@ -46237,6 +46240,34 @@
           "service_settings"
         ]
       },
+      "inference._types:InferenceChunkingSettings": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/inference._types:InferenceEndpoint"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "max_chunk_size": {
+                "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+                "type": "number"
+              },
+              "overlap": {
+                "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+                "type": "number"
+              },
+              "sentence_overlap": {
+                "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+                "type": "number"
+              },
+              "strategy": {
+                "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+                "type": "string"
+              }
+            }
+          }
+        ]
+      },
       "inference._types:ServiceSettings": {
         "type": "object"
       },
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index 382bcf29a1..2898b6b5f9 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -96175,7 +96175,7 @@
         "name": "ServiceSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L55-L55",
+      "specLocation": "inference/_types/Services.ts#L92-L92",
       "type": {
         "kind": "user_defined_value"
       }
@@ -96213,7 +96213,7 @@
         "name": "TaskSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L57-L57",
+      "specLocation": "inference/_types/Services.ts#L94-L94",
       "type": {
         "kind": "user_defined_value"
       }
@@ -122841,7 +122841,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L41-L53"
+      "specLocation": "inference/_types/Services.ts#L46-L58"
     },
     {
       "description": "Configuration options when storing the inference endpoint",
@@ -122851,6 +122851,18 @@
         "namespace": "inference._types"
       },
       "properties": [
+        {
+          "description": "Chunking configuration object",
+          "name": "chunking_settings",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "InferenceChunkingSettings",
+              "namespace": "inference._types"
+            }
+          }
+        },
         {
           "description": "The service type",
           "name": "service",
@@ -122888,7 +122900,76 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L23-L39"
+      "specLocation": "inference/_types/Services.ts#L24-L44"
+    },
+    {
+      "description": "Chunking configuration object",
+      "inherits": {
+        "type": {
+          "name": "InferenceEndpoint",
+          "namespace": "inference._types"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "InferenceChunkingSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+          "name": "max_chunk_size",
+          "required": false,
+          "serverDefault": 250,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+          "name": "overlap",
+          "required": false,
+          "serverDefault": 100,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+          "name": "sentence_overlap",
+          "required": false,
+          "serverDefault": 1,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+          "name": "strategy",
+          "required": false,
+          "serverDefault": "sentence",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L60-L90"
     },
     {
       "description": "InferenceResult is an aggregation of mutually exclusive variants",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index cabb171688..0077b2243f 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -144135,6 +144135,75 @@
         }
       }
     },
+    {
+      "kind": "interface",
+      "description": "Chunking configuration object",
+      "inherits": {
+        "type": {
+          "name": "InferenceEndpoint",
+          "namespace": "inference._types"
+        }
+      },
+      "name": {
+        "name": "InferenceChunkingSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)",
+          "name": "max_chunk_size",
+          "required": false,
+          "serverDefault": 250,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`",
+          "name": "overlap",
+          "required": false,
+          "serverDefault": 100,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`",
+          "name": "sentence_overlap",
+          "required": false,
+          "serverDefault": 1,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`",
+          "name": "strategy",
+          "required": false,
+          "serverDefault": "sentence",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L60-L90"
+    },
     {
       "kind": "interface",
       "description": "Configuration options when storing the inference endpoint",
@@ -144143,6 +144212,18 @@
         "namespace": "inference._types"
       },
       "properties": [
+        {
+          "description": "Chunking configuration object",
+          "name": "chunking_settings",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "InferenceChunkingSettings",
+              "namespace": "inference._types"
+            }
+          }
+        },
         {
           "description": "The service type",
           "name": "service",
@@ -144180,7 +144261,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L23-L39"
+      "specLocation": "inference/_types/Services.ts#L24-L44"
     },
     {
       "kind": "interface",
@@ -144221,7 +144302,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L41-L53"
+      "specLocation": "inference/_types/Services.ts#L46-L58"
     },
     {
       "kind": "interface",
@@ -144357,7 +144438,7 @@
         "name": "ServiceSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L55-L55",
+      "specLocation": "inference/_types/Services.ts#L92-L92",
       "type": {
         "kind": "user_defined_value"
       }
@@ -144416,7 +144497,7 @@
         "name": "TaskSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L57-L57",
+      "specLocation": "inference/_types/Services.ts#L94-L94",
       "type": {
         "kind": "user_defined_value"
       }
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 884e92805e..81d272a2a1 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -12869,7 +12869,15 @@ export type InferenceDenseByteVector = byte[]
 
 export type InferenceDenseVector = float[]
 
+export interface InferenceInferenceChunkingSettings extends InferenceInferenceEndpoint {
+  max_chunk_size?: integer
+  overlap?: integer
+  sentence_overlap?: integer
+  strategy?: string
+}
+
 export interface InferenceInferenceEndpoint {
+  chunking_settings?: InferenceInferenceChunkingSettings
   service: string
   service_settings: InferenceServiceSettings
   task_settings?: InferenceTaskSettings
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index 52d3c9f7e4..53024633f5 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -18,12 +18,17 @@
  */
 
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
+import { integer } from '@_types/Numeric'
 import { TaskType } from '../_types/TaskType'
 
 /**
  * Configuration options when storing the inference endpoint
  */
 export class InferenceEndpoint {
+  /**
+   * Chunking configuration object
+   */
+  chunking_settings?: InferenceChunkingSettings
   /**
    * The service type
    */
@@ -52,6 +57,38 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
   task_type: TaskType
 }
 
+/**
+ * Chunking configuration object
+ */
+export class InferenceChunkingSettings extends InferenceEndpoint {
+  /**
+   * Specifies the maximum size of a chunk in words
+   * This value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)
+   * @server_default 250
+   */
+  max_chunk_size?: integer
+  /**
+   * Specifies the number of overlapping words for chunks
+   * Only for `word` chunking strategy
+   * This value cannot be higher than the half of `max_chunk_size`
+   * @server_default 100
+   */
+  overlap?: integer
+  /**
+   * Specifies the number of overlapping sentences for chunks
+   * Only for `sentence` chunking strategy
+   * It can be either `1` or `0`
+   * @server_default 1
+   */
+  sentence_overlap?: integer
+  /**
+   * Specifies the chunking strategy
+   * It could be either `sentence` or `word`
+   * @server_default sentence
+   */
+  strategy?: string
+}
+
 export type ServiceSettings = UserDefinedValue
 
 export type TaskSettings = UserDefinedValue