From 074d42c8a3833428781aa1a85ec47c1e4f4df681 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Tue, 15 Jul 2025 10:57:55 +0100
Subject: [PATCH] Add timeout params for PUT inference and POST
 inference/_stream (#4895)

(cherry picked from commit 7fbfa61ea927b7998e5021ba4a4ca721dbc40052)

# Conflicts:
#	specification/inference/put_deepseek/PutDeepSeekRequest.ts
---
 output/openapi/elasticsearch-openapi.json     | 201 ++++++++-
 .../elasticsearch-serverless-openapi.json     | 191 +++++++-
 output/schema/schema.json                     | 420 ++++++++++++------
 output/typescript/types.ts                    |  18 +
 specification/inference/put/PutRequest.ts     |   8 +
 .../PutAlibabaCloudRequest.ts                 |   8 +
 .../PutAmazonBedrockRequest.ts                |   8 +
 .../put_anthropic/PutAnthropicRequest.ts      |   8 +
 .../PutAzureAiStudioRequest.ts                |   8 +
 .../put_azureopenai/PutAzureOpenAiRequest.ts  |   8 +
 .../inference/put_cohere/PutCohereRequest.ts  |   8 +
 .../PutElasticsearchRequest.ts                |   8 +
 .../inference/put_elser/PutElserRequest.ts    |   8 +
 .../PutGoogleAiStudioRequest.ts               |   8 +
 .../PutGoogleVertexAiRequest.ts               |   8 +
 .../put_hugging_face/PutHuggingFaceRequest.ts |   8 +
 .../inference/put_jinaai/PutJinaAiRequest.ts  |   8 +
 .../put_mistral/PutMistralRequest.ts          |   8 +
 .../inference/put_openai/PutOpenAiRequest.ts  |   8 +
 .../put_voyageai/PutVoyageAIRequest.ts        |   8 +
 .../put_watsonx/PutWatsonxRequest.ts          |   8 +
 .../StreamInferenceRequest.ts                 |   8 +
 22 files changed, 812 insertions(+), 162 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index cd59aa6038..3ffda17ace 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -19686,6 +19686,9 @@
         "parameters": [
           {
             "$ref": "#/components/parameters/inference.put-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.put-timeout"
           }
         ],
         "requestBody": {
@@ -19807,6 +19810,9 @@
           },
           {
             "$ref": "#/components/parameters/inference.put-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.put-timeout"
           }
         ],
         "requestBody": {
@@ -19944,6 +19950,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20046,6 +20062,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20138,6 +20164,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20224,6 +20260,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20316,6 +20362,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20408,6 +20464,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20500,6 +20566,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20618,6 +20694,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20714,6 +20800,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20798,6 +20894,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20890,6 +20996,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -20918,11 +21034,6 @@
                   "summary": "A text embedding task",
                   "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.",
                   "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    }\n}"
-                },
-                "PutHuggingFaceRequestExample2": {
-                  "summary": "A rerank task",
-                  "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.",
-                  "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    },\n    \"task_settings\": {\n        \"return_documents\": true,\n        \"top_n\": 3\n    }\n}"
                 }
               }
             }
@@ -20979,6 +21090,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -21071,6 +21192,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -21154,6 +21285,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -21246,6 +21387,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -21338,6 +21489,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -21457,16 +21618,6 @@
                   "summary": "Rerank task",
                   "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.",
                   "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\"\n}"
-                },
-                "RerankRequestExample2": {
-                  "summary": "Rerank task",
-                  "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-                  "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": false,\n  \"top_n\": 2\n}"
-                },
-                "RerankRequestExample3": {
-                  "summary": "Rerank task",
-                  "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-                  "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": true,\n  \"top_n\": 3\n}"
                 }
               }
             }
@@ -21615,6 +21766,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "The amount of time to wait for the inference request to complete.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -118304,6 +118465,16 @@
         },
         "style": "simple"
       },
+      "inference.put-timeout": {
+        "in": "query",
+        "name": "timeout",
+        "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Duration"
+        },
+        "style": "form"
+      },
       "inference.update-inference_id": {
         "in": "path",
         "name": "inference_id",
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 5020059d5d..052f51ae73 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -10603,6 +10603,9 @@
         "parameters": [
           {
             "$ref": "#/components/parameters/inference.put-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.put-timeout"
           }
         ],
         "requestBody": {
@@ -10724,6 +10727,9 @@
           },
           {
             "$ref": "#/components/parameters/inference.put-inference_id"
+          },
+          {
+            "$ref": "#/components/parameters/inference.put-timeout"
           }
         ],
         "requestBody": {
@@ -10861,6 +10867,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -10963,6 +10979,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11055,6 +11081,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11141,6 +11177,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11233,6 +11279,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11325,6 +11381,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11417,6 +11483,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11535,6 +11611,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11631,6 +11717,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11715,6 +11811,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11807,6 +11913,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11835,11 +11951,6 @@
                   "summary": "A text embedding task",
                   "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.",
                   "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    }\n}"
-                },
-                "PutHuggingFaceRequestExample2": {
-                  "summary": "A rerank task",
-                  "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.",
-                  "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    },\n    \"task_settings\": {\n        \"return_documents\": true,\n        \"top_n\": 3\n    }\n}"
                 }
               }
             }
@@ -11896,6 +12007,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -11988,6 +12109,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -12071,6 +12202,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -12163,6 +12304,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -12255,6 +12406,16 @@
               "$ref": "#/components/schemas/_types.Id"
             },
             "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
           }
         ],
         "requestBody": {
@@ -12374,16 +12535,6 @@
                   "summary": "Rerank task",
                   "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.",
                   "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\"\n}"
-                },
-                "RerankRequestExample2": {
-                  "summary": "Rerank task",
-                  "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-                  "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": false,\n  \"top_n\": 2\n}"
-                },
-                "RerankRequestExample3": {
-                  "summary": "Rerank task",
-                  "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-                  "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": true,\n  \"top_n\": 3\n}"
                 }
               }
             }
@@ -70675,6 +70826,16 @@
         },
         "style": "simple"
       },
+      "inference.put-timeout": {
+        "in": "query",
+        "name": "timeout",
+        "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+        "deprecated": false,
+        "schema": {
+          "$ref": "#/components/schemas/_types.Duration"
+        },
+        "style": "form"
+      },
       "ingest.get_pipeline-id": {
         "in": "path",
         "name": "id",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 7ab4ab13d1..6bf1c7e635 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -168083,8 +168083,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put/PutRequest.ts#L25-L78"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put/PutRequest.ts#L26-L86"
     },
     {
       "kind": "response",
@@ -168332,8 +168346,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L30-L77"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L31-L85"
     },
     {
       "kind": "response",
@@ -168517,8 +168545,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L30-L80"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L31-L88"
     },
     {
       "kind": "response",
@@ -168669,8 +168711,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L30-L78"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L31-L86"
     },
     {
       "kind": "response",
@@ -168854,8 +168910,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L30-L77"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L31-L85"
     },
     {
       "kind": "response",
@@ -169039,8 +169109,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L30-L85"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L31-L93"
     },
     {
       "kind": "response",
@@ -169224,8 +169308,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L30-L78"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L31-L86"
     },
     {
       "kind": "response",
@@ -169537,8 +169635,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L30-L91"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L31-L99"
     },
     {
       "kind": "response",
@@ -169720,8 +169832,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L29-L86"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L30-L94"
     },
     {
       "kind": "response",
@@ -169867,8 +169993,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L29-L71"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L30-L79"
     },
     {
       "kind": "response",
@@ -170052,8 +170192,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L30-L77"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L31-L85"
     },
     {
       "kind": "response",
@@ -170155,38 +170309,6 @@
           "method_request": "PUT _inference/text_embedding/hugging-face-embeddings",
           "summary": "A text embedding task",
           "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    }\n}"
-        },
-        "PutHuggingFaceRequestExample2": {
-          "alternatives": [
-            {
-              "code": "resp = client.inference.put(\n    task_type=\"rerank\",\n    inference_id=\"hugging-face-rerank\",\n    inference_config={\n        \"service\": \"hugging_face\",\n        \"service_settings\": {\n            \"api_key\": \"hugging-face-access-token\",\n            \"url\": \"url-endpoint\"\n        },\n        \"task_settings\": {\n            \"return_documents\": True,\n            \"top_n\": 3\n        }\n    },\n)",
-              "language": "Python"
-            },
-            {
-              "code": "const response = await client.inference.put({\n  task_type: \"rerank\",\n  inference_id: \"hugging-face-rerank\",\n  inference_config: {\n    service: \"hugging_face\",\n    service_settings: {\n      api_key: \"hugging-face-access-token\",\n      url: \"url-endpoint\",\n    },\n    task_settings: {\n      return_documents: true,\n      top_n: 3,\n    },\n  },\n});",
-              "language": "JavaScript"
-            },
-            {
-              "code": "response = client.inference.put(\n  task_type: \"rerank\",\n  inference_id: \"hugging-face-rerank\",\n  body: {\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n      \"api_key\": \"hugging-face-access-token\",\n      \"url\": \"url-endpoint\"\n    },\n    \"task_settings\": {\n      \"return_documents\": true,\n      \"top_n\": 3\n    }\n  }\n)",
-              "language": "Ruby"
-            },
-            {
-              "code": "$resp = $client->inference()->put([\n    \"task_type\" => \"rerank\",\n    \"inference_id\" => \"hugging-face-rerank\",\n    \"body\" => [\n        \"service\" => \"hugging_face\",\n        \"service_settings\" => [\n            \"api_key\" => \"hugging-face-access-token\",\n            \"url\" => \"url-endpoint\",\n        ],\n        \"task_settings\" => [\n            \"return_documents\" => true,\n            \"top_n\" => 3,\n        ],\n    ],\n]);",
-              "language": "PHP"
-            },
-            {
-              "code": "curl -X PUT -H \"Authorization: ApiKey $ELASTIC_API_KEY\" -H \"Content-Type: application/json\" -d '{\"service\":\"hugging_face\",\"service_settings\":{\"api_key\":\"hugging-face-access-token\",\"url\":\"url-endpoint\"},\"task_settings\":{\"return_documents\":true,\"top_n\":3}}' \"$ELASTICSEARCH_URL/_inference/rerank/hugging-face-rerank\"",
-              "language": "curl"
-            },
-            {
-              "code": "client.inference().put(p -> p\n    .inferenceId(\"hugging-face-rerank\")\n    .taskType(TaskType.Rerank)\n    .inferenceConfig(i -> i\n        .service(\"hugging_face\")\n        .serviceSettings(JsonData.fromJson(\"{\\\"api_key\\\":\\\"hugging-face-access-token\\\",\\\"url\\\":\\\"url-endpoint\\\"}\"))\n        .taskSettings(JsonData.fromJson(\"{\\\"return_documents\\\":true,\\\"top_n\\\":3}\"))\n    )\n);\n",
-              "language": "Java"
-            }
-          ],
-          "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.",
-          "method_request": "PUT _inference/rerank/hugging-face-rerank",
-          "summary": "A rerank task",
-          "value": "{\n    \"service\": \"hugging_face\",\n    \"service_settings\": {\n        \"api_key\": \"hugging-face-access-token\", \n        \"url\": \"url-endpoint\" \n    },\n    \"task_settings\": {\n        \"return_documents\": true,\n        \"top_n\": 3\n    }\n}"
         }
       },
       "inherits": {
@@ -170225,8 +170347,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L30-L93"
     },
     {
       "kind": "response",
@@ -170410,8 +170546,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L30-L80"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L31-L88"
     },
     {
       "kind": "response",
@@ -170550,8 +170700,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L29-L72"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L30-L80"
     },
     {
       "kind": "response",
@@ -170735,8 +170899,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L30-L78"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L31-L86"
     },
     {
       "kind": "response",
@@ -170920,8 +171098,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L30-L79"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L31-L87"
     },
     {
       "kind": "response",
@@ -171046,8 +171238,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L28-L68"
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L29-L76"
     },
     {
       "kind": "response",
@@ -171162,70 +171368,6 @@
           "method_request": "POST _inference/rerank/cohere_rerank",
           "summary": "Rerank task",
           "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\"\n}"
-        },
-        "RerankRequestExample2": {
-          "alternatives": [
-            {
-              "code": "resp = client.inference.rerank(\n    inference_id=\"bge-reranker-base-mkn\",\n    input=[\n        \"luke\",\n        \"like\",\n        \"leia\",\n        \"chewy\",\n        \"r2d2\",\n        \"star\",\n        \"wars\"\n    ],\n    query=\"star wars main character\",\n    return_documents=False,\n    top_n=2,\n)",
-              "language": "Python"
-            },
-            {
-              "code": "const response = await client.inference.rerank({\n  inference_id: \"bge-reranker-base-mkn\",\n  input: [\"luke\", \"like\", \"leia\", \"chewy\", \"r2d2\", \"star\", \"wars\"],\n  query: \"star wars main character\",\n  return_documents: false,\n  top_n: 2,\n});",
-              "language": "JavaScript"
-            },
-            {
-              "code": "response = client.inference.rerank(\n  inference_id: \"bge-reranker-base-mkn\",\n  body: {\n    \"input\": [\n      \"luke\",\n      \"like\",\n      \"leia\",\n      \"chewy\",\n      \"r2d2\",\n      \"star\",\n      \"wars\"\n    ],\n    \"query\": \"star wars main character\",\n    \"return_documents\": false,\n    \"top_n\": 2\n  }\n)",
-              "language": "Ruby"
-            },
-            {
-              "code": "$resp = $client->inference()->rerank([\n    \"inference_id\" => \"bge-reranker-base-mkn\",\n    \"body\" => [\n        \"input\" => array(\n            \"luke\",\n            \"like\",\n            \"leia\",\n            \"chewy\",\n            \"r2d2\",\n            \"star\",\n            \"wars\",\n        ),\n        \"query\" => \"star wars main character\",\n        \"return_documents\" => false,\n        \"top_n\" => 2,\n    ],\n]);",
-              "language": "PHP"
-            },
-            {
-              "code": "curl -X POST -H \"Authorization: ApiKey $ELASTIC_API_KEY\" -H \"Content-Type: application/json\" -d '{\"input\":[\"luke\",\"like\",\"leia\",\"chewy\",\"r2d2\",\"star\",\"wars\"],\"query\":\"star wars main character\",\"return_documents\":false,\"top_n\":2}' \"$ELASTICSEARCH_URL/_inference/rerank/bge-reranker-base-mkn\"",
-              "language": "curl"
-            },
-            {
-              "code": "client.inference().rerank(r -> r\n    .inferenceId(\"bge-reranker-base-mkn\")\n    .input(List.of(\"luke\",\"like\",\"leia\",\"chewy\",\"r2d2\",\"star\",\"wars\"))\n    .query(\"star wars main character\")\n);\n",
-              "language": "Java"
-            }
-          ],
-          "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-          "method_request": "POST _inference/rerank/bge-reranker-base-mkn",
-          "summary": "Rerank task",
-          "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": false,\n  \"top_n\": 2\n}"
-        },
-        "RerankRequestExample3": {
-          "alternatives": [
-            {
-              "code": "resp = client.inference.rerank(\n    inference_id=\"bge-reranker-base-mkn\",\n    input=[\n        \"luke\",\n        \"like\",\n        \"leia\",\n        \"chewy\",\n        \"r2d2\",\n        \"star\",\n        \"wars\"\n    ],\n    query=\"star wars main character\",\n    return_documents=True,\n    top_n=3,\n)",
-              "language": "Python"
-            },
-            {
-              "code": "const response = await client.inference.rerank({\n  inference_id: \"bge-reranker-base-mkn\",\n  input: [\"luke\", \"like\", \"leia\", \"chewy\", \"r2d2\", \"star\", \"wars\"],\n  query: \"star wars main character\",\n  return_documents: true,\n  top_n: 3,\n});",
-              "language": "JavaScript"
-            },
-            {
-              "code": "response = client.inference.rerank(\n  inference_id: \"bge-reranker-base-mkn\",\n  body: {\n    \"input\": [\n      \"luke\",\n      \"like\",\n      \"leia\",\n      \"chewy\",\n      \"r2d2\",\n      \"star\",\n      \"wars\"\n    ],\n    \"query\": \"star wars main character\",\n    \"return_documents\": true,\n    \"top_n\": 3\n  }\n)",
-              "language": "Ruby"
-            },
-            {
-              "code": "$resp = $client->inference()->rerank([\n    \"inference_id\" => \"bge-reranker-base-mkn\",\n    \"body\" => [\n        \"input\" => array(\n            \"luke\",\n            \"like\",\n            \"leia\",\n            \"chewy\",\n            \"r2d2\",\n            \"star\",\n            \"wars\",\n        ),\n        \"query\" => \"star wars main character\",\n        \"return_documents\" => true,\n        \"top_n\" => 3,\n    ],\n]);",
-              "language": "PHP"
-            },
-            {
-              "code": "curl -X POST -H \"Authorization: ApiKey $ELASTIC_API_KEY\" -H \"Content-Type: application/json\" -d '{\"input\":[\"luke\",\"like\",\"leia\",\"chewy\",\"r2d2\",\"star\",\"wars\"],\"query\":\"star wars main character\",\"return_documents\":true,\"top_n\":3}' \"$ELASTICSEARCH_URL/_inference/rerank/bge-reranker-base-mkn\"",
-              "language": "curl"
-            },
-            {
-              "code": "client.inference().rerank(r -> r\n    .inferenceId(\"bge-reranker-base-mkn\")\n    .input(List.of(\"luke\",\"like\",\"leia\",\"chewy\",\"r2d2\",\"star\",\"wars\"))\n    .query(\"star wars main character\")\n);\n",
-              "language": "Java"
-            }
-          ],
-          "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face",
-          "method_request": "POST _inference/rerank/bge-reranker-base-mkn",
-          "summary": "Rerank task",
-          "value": "{\n  \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n  \"query\": \"star wars main character\",\n  \"return_documents\": true,\n  \"top_n\": 3\n}"
         }
       },
       "inherits": {
@@ -171554,8 +171696,22 @@
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/stream_completion/StreamInferenceRequest.ts#L24-L63"
+      "query": [
+        {
+          "description": "The amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/stream_completion/StreamInferenceRequest.ts#L25-L71"
     },
     {
       "kind": "response",
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index b94babbf54..6557a0d99d 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13940,6 +13940,7 @@ export type InferenceInferenceResponse = InferenceInferenceResult
 export interface InferencePutRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
+  timeout?: Duration
   body?: InferenceInferenceEndpoint
 }
 
@@ -13948,6 +13949,7 @@ export type InferencePutResponse = InferenceInferenceEndpointInfo
 export interface InferencePutAlibabacloudRequest extends RequestBase {
   task_type: InferenceAlibabaCloudTaskType
   alibabacloud_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceAlibabaCloudServiceType
@@ -13961,6 +13963,7 @@ export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfoAli
 export interface InferencePutAmazonbedrockRequest extends RequestBase {
   task_type: InferenceAmazonBedrockTaskType
   amazonbedrock_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceAmazonBedrockServiceType
@@ -13974,6 +13977,7 @@ export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfoAm
 export interface InferencePutAnthropicRequest extends RequestBase {
   task_type: InferenceAnthropicTaskType
   anthropic_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceAnthropicServiceType
@@ -13987,6 +13991,7 @@ export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfoAnthro
 export interface InferencePutAzureaistudioRequest extends RequestBase {
   task_type: InferenceAzureAiStudioTaskType
   azureaistudio_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceAzureAiStudioServiceType
@@ -14000,6 +14005,7 @@ export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfoAz
 export interface InferencePutAzureopenaiRequest extends RequestBase {
   task_type: InferenceAzureOpenAITaskType
   azureopenai_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceAzureOpenAIServiceType
@@ -14013,6 +14019,7 @@ export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfoAzur
 export interface InferencePutCohereRequest extends RequestBase {
   task_type: InferenceCohereTaskType
   cohere_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceCohereServiceType
@@ -14026,6 +14033,7 @@ export type InferencePutCohereResponse = InferenceInferenceEndpointInfoCohere
 export interface InferencePutElasticsearchRequest extends RequestBase {
   task_type: InferenceElasticsearchTaskType
   elasticsearch_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceElasticsearchServiceType
@@ -14039,6 +14047,7 @@ export type InferencePutElasticsearchResponse = InferenceInferenceEndpointInfoEl
 export interface InferencePutElserRequest extends RequestBase {
   task_type: InferenceElserTaskType
   elser_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceElserServiceType
@@ -14051,6 +14060,7 @@ export type InferencePutElserResponse = InferenceInferenceEndpointInfoELSER
 export interface InferencePutGoogleaistudioRequest extends RequestBase {
   task_type: InferenceGoogleAiStudioTaskType
   googleaistudio_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceGoogleAiServiceType
@@ -14063,6 +14073,7 @@ export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfoG
 export interface InferencePutGooglevertexaiRequest extends RequestBase {
   task_type: InferenceGoogleVertexAITaskType
   googlevertexai_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceGoogleVertexAIServiceType
@@ -14076,6 +14087,7 @@ export type InferencePutGooglevertexaiResponse = InferenceInferenceEndpointInfoG
 export interface InferencePutHuggingFaceRequest extends RequestBase {
   task_type: InferenceHuggingFaceTaskType
   huggingface_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceHuggingFaceServiceType
@@ -14088,6 +14100,7 @@ export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfoHugg
 export interface InferencePutJinaaiRequest extends RequestBase {
   task_type: InferenceJinaAITaskType
   jinaai_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceJinaAIServiceType
@@ -14101,6 +14114,7 @@ export type InferencePutJinaaiResponse = InferenceInferenceEndpointInfoJinaAi
 export interface InferencePutMistralRequest extends RequestBase {
   task_type: InferenceMistralTaskType
   mistral_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceMistralServiceType
@@ -14113,6 +14127,7 @@ export type InferencePutMistralResponse = InferenceInferenceEndpointInfoMistral
 export interface InferencePutOpenaiRequest extends RequestBase {
   task_type: InferenceOpenAITaskType
   openai_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceOpenAIServiceType
@@ -14126,6 +14141,7 @@ export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfoOpenAI
 export interface InferencePutVoyageaiRequest extends RequestBase {
   task_type: InferenceVoyageAITaskType
   voyageai_inference_id: Id
+  timeout?: Duration
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
     service: InferenceVoyageAIServiceType
@@ -14139,6 +14155,7 @@ export type InferencePutVoyageaiResponse = InferenceInferenceEndpointInfoVoyageA
 export interface InferencePutWatsonxRequest extends RequestBase {
   task_type: InferenceWatsonxTaskType
   watsonx_inference_id: Id
+  timeout?: Duration
   body?: {
     service: InferenceWatsonxServiceType
     service_settings: InferenceWatsonxServiceSettings
@@ -14172,6 +14189,7 @@ export type InferenceSparseEmbeddingResponse = InferenceSparseEmbeddingInference
 
 export interface InferenceStreamCompletionRequest extends RequestBase {
   inference_id: Id
+  timeout?: Duration
   body?: {
     input: string | string[]
     task_settings?: InferenceTaskSettings
diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
index 36eb3e88cb..8f609d35e9 100644
--- a/specification/inference/put/PutRequest.ts
+++ b/specification/inference/put/PutRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import { InferenceEndpoint } from '@inference/_types/Services'
 import { TaskType } from '@inference/_types/TaskType'
 
@@ -73,6 +74,13 @@ export interface Request extends RequestBase {
      */
     inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   /** @codegen_name inference_config */
   body: InferenceEndpoint
 }
diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
index f390250bbd..c725397056 100644
--- a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
+++ b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   AlibabaCloudServiceSettings,
   AlibabaCloudServiceType,
@@ -54,6 +55,13 @@ export interface Request extends RequestBase {
      */
     alibabacloud_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
index 0420c26263..61927ce3bf 100644
--- a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
+++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   AmazonBedrockServiceSettings,
   AmazonBedrockServiceType,
@@ -57,6 +58,13 @@ export interface Request extends RequestBase {
      */
     amazonbedrock_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts
index d0e0b87ed3..d7942495c3 100644
--- a/specification/inference/put_anthropic/PutAnthropicRequest.ts
+++ b/specification/inference/put_anthropic/PutAnthropicRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   AnthropicServiceSettings,
   AnthropicServiceType,
@@ -55,6 +56,13 @@ export interface Request extends RequestBase {
      */
     anthropic_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
index d09f31b75c..6ab0d8b029 100644
--- a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
+++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   AzureAiStudioServiceSettings,
   AzureAiStudioServiceType,
@@ -54,6 +55,13 @@ export interface Request extends RequestBase {
      */
     azureaistudio_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
index 63f0c42a8b..af881b68ef 100644
--- a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
+++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   AzureOpenAIServiceSettings,
   AzureOpenAIServiceType,
@@ -62,6 +63,13 @@ export interface Request extends RequestBase {
      */
     azureopenai_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_cohere/PutCohereRequest.ts b/specification/inference/put_cohere/PutCohereRequest.ts
index 52ddd382e7..6ebfc47292 100644
--- a/specification/inference/put_cohere/PutCohereRequest.ts
+++ b/specification/inference/put_cohere/PutCohereRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   CohereServiceSettings,
   CohereServiceType,
@@ -54,6 +55,13 @@ export interface Request extends RequestBase {
      */
     cohere_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index bdf2a8d991..b57b062f67 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   ElasticsearchServiceSettings,
   ElasticsearchServiceType,
@@ -68,6 +69,13 @@ export interface Request extends RequestBase {
      */
     elasticsearch_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_elser/PutElserRequest.ts b/specification/inference/put_elser/PutElserRequest.ts
index d9a4812243..179bb1e379 100644
--- a/specification/inference/put_elser/PutElserRequest.ts
+++ b/specification/inference/put_elser/PutElserRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   ElserServiceSettings,
   ElserServiceType,
@@ -68,6 +69,13 @@ export interface Request extends RequestBase {
      */
     elser_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
index 691710a32e..6871ceb750 100644
--- a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
+++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   GoogleAiServiceType,
   GoogleAiStudioServiceSettings,
@@ -53,6 +54,13 @@ export interface Request extends RequestBase {
      */
     googleaistudio_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
index 40a65dbb56..80cf04059e 100644
--- a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
+++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   GoogleVertexAIServiceSettings,
   GoogleVertexAIServiceType,
@@ -54,6 +55,13 @@ export interface Request extends RequestBase {
      */
     googlevertexai_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
index 9c2b4855a7..1fa3113891 100644
--- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
+++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   HuggingFaceServiceSettings,
   HuggingFaceServiceType,
@@ -67,6 +68,13 @@ export interface Request extends RequestBase {
      */
     huggingface_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_jinaai/PutJinaAiRequest.ts b/specification/inference/put_jinaai/PutJinaAiRequest.ts
index c34b80d4a4..6685c2c874 100644
--- a/specification/inference/put_jinaai/PutJinaAiRequest.ts
+++ b/specification/inference/put_jinaai/PutJinaAiRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   JinaAIServiceSettings,
   JinaAIServiceType,
@@ -57,6 +58,13 @@ export interface Request extends RequestBase {
      */
     jinaai_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_mistral/PutMistralRequest.ts b/specification/inference/put_mistral/PutMistralRequest.ts
index 0bc9713a87..e7f22db73a 100644
--- a/specification/inference/put_mistral/PutMistralRequest.ts
+++ b/specification/inference/put_mistral/PutMistralRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   MistralServiceSettings,
   MistralServiceType,
@@ -54,6 +55,13 @@ export interface Request extends RequestBase {
      */
     mistral_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts
index d322b89c36..b6bb675f46 100644
--- a/specification/inference/put_openai/PutOpenAiRequest.ts
+++ b/specification/inference/put_openai/PutOpenAiRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   OpenAIServiceSettings,
   OpenAIServiceType,
@@ -55,6 +56,13 @@ export interface Request extends RequestBase {
      */
     openai_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts
index 9d1c2e0006..2bb4b7ef46 100644
--- a/specification/inference/put_voyageai/PutVoyageAIRequest.ts
+++ b/specification/inference/put_voyageai/PutVoyageAIRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   VoyageAIServiceSettings,
   VoyageAIServiceType,
@@ -56,6 +57,13 @@ export interface Request extends RequestBase {
      */
     voyageai_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The chunking configuration object.
diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts
index f7f80f5a81..4e51746776 100644
--- a/specification/inference/put_watsonx/PutWatsonxRequest.ts
+++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import {
   WatsonxServiceSettings,
   WatsonxServiceType,
@@ -55,6 +56,13 @@ export interface Request extends RequestBase {
      */
     watsonx_inference_id: Id
   }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The type of service supported for the specified task type. In this case, `watsonxai`.
diff --git a/specification/inference/stream_completion/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts
index 1d2c83bee9..0e08af6a6f 100644
--- a/specification/inference/stream_completion/StreamInferenceRequest.ts
+++ b/specification/inference/stream_completion/StreamInferenceRequest.ts
@@ -19,6 +19,7 @@
 
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
 import { TaskSettings } from '@inference/_types/Services'
 
 /**
@@ -47,6 +48,13 @@ export interface Request extends RequestBase {
      */
     inference_id: Id
   }
+  query_parameters: {
+    /**
+     * The amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
   body: {
     /**
      * The text on which you want to perform the inference task.