diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 384c07454e..4fc448ff3e 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17451,10 +17451,11 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object"
+                "$ref": "#/components/schemas/inference._types:RequestChatCompletionBase"
               }
             }
-          }
+          },
+          "required": true
         },
         "responses": {
           "200": {
@@ -17742,10 +17743,11 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object"
+                "$ref": "#/components/schemas/inference._types:RequestChatCompletionBase"
               }
             }
-          }
+          },
+          "required": true
         },
         "responses": {
           "200": {
@@ -17778,7 +17780,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskType"
+              "$ref": "#/components/schemas/inference._types:AlibabaCloudTaskType"
             },
             "style": "simple"
           },
@@ -17804,13 +17806,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudTaskSettings"
                   }
                 },
                 "required": [
@@ -17874,7 +17876,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType"
+              "$ref": "#/components/schemas/inference._types:AmazonBedrockTaskType"
             },
             "style": "simple"
           },
@@ -17900,13 +17902,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockTaskSettings"
                   }
                 },
                 "required": [
@@ -17960,7 +17962,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskType"
+              "$ref": "#/components/schemas/inference._types:AnthropicTaskType"
             },
             "style": "simple"
           },
@@ -17986,13 +17988,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AnthropicServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:AnthropicServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AnthropicServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AnthropicTaskSettings"
                   }
                 },
                 "required": [
@@ -18040,7 +18042,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType"
+              "$ref": "#/components/schemas/inference._types:AzureAiStudioTaskType"
             },
             "style": "simple"
           },
@@ -18066,13 +18068,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioTaskSettings"
                   }
                 },
                 "required": [
@@ -18126,7 +18128,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType"
+              "$ref": "#/components/schemas/inference._types:AzureOpenAITaskType"
             },
             "style": "simple"
           },
@@ -18152,13 +18154,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAITaskSettings"
                   }
                 },
                 "required": [
@@ -18212,7 +18214,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_cohere:CohereTaskType"
+              "$ref": "#/components/schemas/inference._types:CohereTaskType"
             },
             "style": "simple"
           },
@@ -18238,13 +18240,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_cohere:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:CohereServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_cohere:CohereServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:CohereServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_cohere:CohereTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:CohereTaskSettings"
                   }
                 },
                 "required": [
@@ -18298,7 +18300,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_eis:EisTaskType"
+              "$ref": "#/components/schemas/inference._types:EisTaskType"
             },
             "style": "simple"
           },
@@ -18321,10 +18323,10 @@
                 "type": "object",
                 "properties": {
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_eis:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:EisServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:EisServiceSettings"
                   }
                 },
                 "required": [
@@ -18366,7 +18368,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskType"
+              "$ref": "#/components/schemas/inference._types:ElasticsearchTaskType"
             },
             "style": "simple"
           },
@@ -18392,13 +18394,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchTaskSettings"
                   }
                 },
                 "required": [
@@ -18478,7 +18480,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_elser:ElserTaskType"
+              "$ref": "#/components/schemas/inference._types:ElserTaskType"
             },
             "style": "simple"
           },
@@ -18504,10 +18506,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_elser:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:ElserServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_elser:ElserServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:ElserServiceSettings"
                   }
                 },
                 "required": [
@@ -18568,7 +18570,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioTaskType"
+              "$ref": "#/components/schemas/inference._types:GoogleAiStudioTaskType"
             },
             "style": "simple"
           },
@@ -18594,10 +18596,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_googleaistudio:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:GoogleAiServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleAiStudioServiceSettings"
                   }
                 },
                 "required": [
@@ -18646,7 +18648,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskType"
+              "$ref": "#/components/schemas/inference._types:GoogleVertexAITaskType"
             },
             "style": "simple"
           },
@@ -18672,13 +18674,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAITaskSettings"
                   }
                 },
                 "required": [
@@ -18732,7 +18734,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType"
+              "$ref": "#/components/schemas/inference._types:HuggingFaceTaskType"
             },
             "style": "simple"
           },
@@ -18758,10 +18760,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:HuggingFaceServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:HuggingFaceServiceSettings"
                   }
                 },
                 "required": [
@@ -18810,7 +18812,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskType"
+              "$ref": "#/components/schemas/inference._types:JinaAITaskType"
             },
             "style": "simple"
           },
@@ -18836,13 +18838,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:JinaAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:JinaAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:JinaAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:JinaAITaskSettings"
                   }
                 },
                 "required": [
@@ -18896,7 +18898,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_mistral:MistralTaskType"
+              "$ref": "#/components/schemas/inference._types:MistralTaskType"
             },
             "style": "simple"
           },
@@ -18922,10 +18924,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_mistral:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:MistralServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_mistral:MistralServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:MistralServiceSettings"
                   }
                 },
                 "required": [
@@ -18973,7 +18975,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_openai:OpenAITaskType"
+              "$ref": "#/components/schemas/inference._types:OpenAITaskType"
             },
             "style": "simple"
           },
@@ -18999,13 +19001,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_openai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:OpenAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_openai:OpenAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:OpenAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_openai:OpenAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:OpenAITaskSettings"
                   }
                 },
                 "required": [
@@ -19059,7 +19061,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskType"
+              "$ref": "#/components/schemas/inference._types:VoyageAITaskType"
             },
             "style": "simple"
           },
@@ -19085,13 +19087,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:VoyageAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:VoyageAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:VoyageAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:VoyageAITaskSettings"
                   }
                 },
                 "required": [
@@ -19145,7 +19147,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType"
+              "$ref": "#/components/schemas/inference._types:WatsonxTaskType"
             },
             "style": "simple"
           },
@@ -19168,10 +19170,10 @@
                 "type": "object",
                 "properties": {
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_watsonx:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:WatsonxServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:WatsonxServiceSettings"
                   }
                 },
                 "required": [
@@ -77993,6 +77995,234 @@
           "valid"
         ]
       },
+      "inference._types:RequestChatCompletionBase": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types:RequestBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "messages": {
+                "description": "A list of objects representing the conversation.",
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/inference._types:Message"
+                }
+              },
+              "model": {
+                "description": "The ID of the model to use.",
+                "type": "string"
+              },
+              "max_completion_tokens": {
+                "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+                "type": "number"
+              },
+              "stop": {
+                "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "temperature": {
+                "description": "The sampling temperature to use.",
+                "type": "number"
+              },
+              "tool_choice": {
+                "$ref": "#/components/schemas/inference._types:CompletionToolType"
+              },
+              "tools": {
+                "description": "A list of tools that the model can call.",
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/inference._types:CompletionTool"
+                }
+              },
+              "top_p": {
+                "description": "Nucleus sampling, an alternative to sampling with temperature.",
+                "type": "number"
+              }
+            },
+            "required": [
+              "messages"
+            ]
+          }
+        ]
+      },
+      "inference._types:Message": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/inference._types:MessageContent"
+          },
+          "role": {
+            "description": "The role of the message author.",
+            "type": "string"
+          },
+          "tool_call_id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "tool_calls": {
+            "description": "The tool calls generated by the model.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:ToolCall"
+            }
+          }
+        },
+        "required": [
+          "role"
+        ]
+      },
+      "inference._types:MessageContent": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:ContentObject"
+            }
+          }
+        ]
+      },
+      "inference._types:ContentObject": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "description": "The text content.",
+            "type": "string"
+          },
+          "type": {
+            "description": "The type of content.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "text",
+          "type"
+        ]
+      },
+      "inference._types:ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:ToolCallFunction"
+          },
+          "type": {
+            "description": "The type of the tool call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "id",
+          "function",
+          "type"
+        ]
+      },
+      "inference._types:ToolCallFunction": {
+        "type": "object",
+        "properties": {
+          "arguments": {
+            "description": "The arguments to call the function with in JSON format.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "arguments",
+          "name"
+        ]
+      },
+      "inference._types:CompletionToolType": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "$ref": "#/components/schemas/inference._types:CompletionToolChoice"
+          }
+        ]
+      },
+      "inference._types:CompletionToolChoice": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of the tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:CompletionToolChoiceFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference._types:CompletionToolChoiceFunction": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "inference._types:CompletionTool": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:CompletionToolFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference._types:CompletionToolFunction": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function.",
+            "type": "string"
+          },
+          "parameters": {
+            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+            "type": "object"
+          },
+          "strict": {
+            "description": "Whether to enable schema adherence when generating the function call.",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "_types:RequestBase": {
+        "type": "object"
+      },
       "_types:StreamResult": {
         "type": "object"
       },
@@ -78124,7 +78354,7 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
-      "inference.put_alibabacloud:AlibabaCloudTaskType": {
+      "inference._types:AlibabaCloudTaskType": {
         "type": "string",
         "enum": [
           "completion",
@@ -78133,13 +78363,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_alibabacloud:ServiceType": {
+      "inference._types:AlibabaCloudServiceType": {
         "type": "string",
         "enum": [
           "alibabacloud-ai-search"
         ]
       },
-      "inference.put_alibabacloud:AlibabaCloudServiceSettings": {
+      "inference._types:AlibabaCloudServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78181,7 +78411,7 @@
           }
         }
       },
-      "inference.put_alibabacloud:AlibabaCloudTaskSettings": {
+      "inference._types:AlibabaCloudTaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
@@ -78194,20 +78424,20 @@
           }
         }
       },
-      "inference.put_amazonbedrock:AmazonBedrockTaskType": {
+      "inference._types:AmazonBedrockTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_amazonbedrock:ServiceType": {
+      "inference._types:AmazonBedrockServiceType": {
         "type": "string",
         "enum": [
           "amazonbedrock"
         ]
       },
-      "inference.put_amazonbedrock:AmazonBedrockServiceSettings": {
+      "inference._types:AmazonBedrockServiceSettings": {
         "type": "object",
         "properties": {
           "access_key": {
@@ -78250,7 +78480,7 @@
           "secret_key"
         ]
       },
-      "inference.put_amazonbedrock:AmazonBedrockTaskSettings": {
+      "inference._types:AmazonBedrockTaskSettings": {
         "type": "object",
         "properties": {
           "max_new_tokens": {
@@ -78271,19 +78501,19 @@
           }
         }
       },
-      "inference.put_anthropic:AnthropicTaskType": {
+      "inference._types:AnthropicTaskType": {
         "type": "string",
         "enum": [
           "completion"
         ]
       },
-      "inference.put_anthropic:ServiceType": {
+      "inference._types:AnthropicServiceType": {
         "type": "string",
         "enum": [
           "anthropic"
         ]
       },
-      "inference.put_anthropic:AnthropicServiceSettings": {
+      "inference._types:AnthropicServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78303,7 +78533,7 @@
           "model_id"
         ]
       },
-      "inference.put_anthropic:AnthropicTaskSettings": {
+      "inference._types:AnthropicTaskSettings": {
         "type": "object",
         "properties": {
           "max_tokens": {
@@ -78330,20 +78560,20 @@
           "max_tokens"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioTaskType": {
+      "inference._types:AzureAiStudioTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_azureaistudio:ServiceType": {
+      "inference._types:AzureAiStudioServiceType": {
         "type": "string",
         "enum": [
           "azureaistudio"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioServiceSettings": {
+      "inference._types:AzureAiStudioServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78379,7 +78609,7 @@
           "provider"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioTaskSettings": {
+      "inference._types:AzureAiStudioTaskSettings": {
         "type": "object",
         "properties": {
           "do_sample": {
@@ -78404,20 +78634,20 @@
           }
         }
       },
-      "inference.put_azureopenai:AzureOpenAITaskType": {
+      "inference._types:AzureOpenAITaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_azureopenai:ServiceType": {
+      "inference._types:AzureOpenAIServiceType": {
         "type": "string",
         "enum": [
           "azureopenai"
         ]
       },
-      "inference.put_azureopenai:AzureOpenAIServiceSettings": {
+      "inference._types:AzureOpenAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78462,7 +78692,7 @@
           "resource_name"
         ]
       },
-      "inference.put_azureopenai:AzureOpenAITaskSettings": {
+      "inference._types:AzureOpenAITaskSettings": {
         "type": "object",
         "properties": {
           "user": {
@@ -78471,7 +78701,7 @@
           }
         }
       },
-      "inference.put_cohere:CohereTaskType": {
+      "inference._types:CohereTaskType": {
         "type": "string",
         "enum": [
           "completion",
@@ -78479,13 +78709,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_cohere:ServiceType": {
+      "inference._types:CohereServiceType": {
         "type": "string",
         "enum": [
           "cohere"
         ]
       },
-      "inference.put_cohere:CohereServiceSettings": {
+      "inference._types:CohereServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78496,7 +78726,7 @@
             "type": "string"
           },
           "embedding_type": {
-            "$ref": "#/components/schemas/inference.put_cohere:EmbeddingType"
+            "$ref": "#/components/schemas/inference._types:CohereEmbeddingType"
           },
           "model_id": {
             "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.",
@@ -78506,14 +78736,14 @@
             "$ref": "#/components/schemas/inference._types:RateLimitSetting"
           },
           "similarity": {
-            "$ref": "#/components/schemas/inference.put_cohere:SimilarityType"
+            "$ref": "#/components/schemas/inference._types:CohereSimilarityType"
           }
         },
         "required": [
           "api_key"
         ]
       },
-      "inference.put_cohere:EmbeddingType": {
+      "inference._types:CohereEmbeddingType": {
         "type": "string",
         "enum": [
           "byte",
@@ -78521,7 +78751,7 @@
           "int8"
         ]
       },
-      "inference.put_cohere:SimilarityType": {
+      "inference._types:CohereSimilarityType": {
         "type": "string",
         "enum": [
           "cosine",
@@ -78529,11 +78759,11 @@
           "l2_norm"
         ]
       },
-      "inference.put_cohere:CohereTaskSettings": {
+      "inference._types:CohereTaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
-            "$ref": "#/components/schemas/inference.put_cohere:InputType"
+            "$ref": "#/components/schemas/inference._types:CohereInputType"
           },
           "return_documents": {
             "description": "For a `rerank` task, return doc text within the results.",
@@ -78544,11 +78774,11 @@
             "type": "number"
           },
           "truncate": {
-            "$ref": "#/components/schemas/inference.put_cohere:TruncateType"
+            "$ref": "#/components/schemas/inference._types:CohereTruncateType"
           }
         }
       },
-      "inference.put_cohere:InputType": {
+      "inference._types:CohereInputType": {
         "type": "string",
         "enum": [
           "classification",
@@ -78557,7 +78787,7 @@
           "search"
         ]
       },
-      "inference.put_cohere:TruncateType": {
+      "inference._types:CohereTruncateType": {
         "type": "string",
         "enum": [
           "END",
@@ -78565,19 +78795,19 @@
           "START"
         ]
       },
-      "inference.put_eis:EisTaskType": {
+      "inference._types:EisTaskType": {
         "type": "string",
         "enum": [
           "chat_completion"
         ]
       },
-      "inference.put_eis:ServiceType": {
+      "inference._types:EisServiceType": {
         "type": "string",
         "enum": [
           "elastic"
         ]
       },
-      "inference.put_eis:EisServiceSettings": {
+      "inference._types:EisServiceSettings": {
         "type": "object",
         "properties": {
           "model_id": {
@@ -78592,7 +78822,7 @@
           "model_id"
         ]
       },
-      "inference.put_elasticsearch:ElasticsearchTaskType": {
+      "inference._types:ElasticsearchTaskType": {
         "type": "string",
         "enum": [
           "rerank",
@@ -78600,13 +78830,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_elasticsearch:ServiceType": {
+      "inference._types:ElasticsearchServiceType": {
         "type": "string",
         "enum": [
           "elasticsearch"
         ]
       },
-      "inference.put_elasticsearch:ElasticsearchServiceSettings": {
+      "inference._types:ElasticsearchServiceSettings": {
         "type": "object",
         "properties": {
           "adaptive_allocations": {
@@ -78654,7 +78884,7 @@
           }
         }
       },
-      "inference.put_elasticsearch:ElasticsearchTaskSettings": {
+      "inference._types:ElasticsearchTaskSettings": {
         "type": "object",
         "properties": {
           "return_documents": {
@@ -78663,19 +78893,19 @@
           }
         }
       },
-      "inference.put_elser:ElserTaskType": {
+      "inference._types:ElserTaskType": {
         "type": "string",
         "enum": [
           "sparse_embedding"
         ]
       },
-      "inference.put_elser:ServiceType": {
+      "inference._types:ElserServiceType": {
         "type": "string",
         "enum": [
           "elser"
         ]
       },
-      "inference.put_elser:ElserServiceSettings": {
+      "inference._types:ElserServiceSettings": {
         "type": "object",
         "properties": {
           "adaptive_allocations": {
@@ -78695,20 +78925,20 @@
           "num_threads"
         ]
       },
-      "inference.put_googleaistudio:GoogleAiStudioTaskType": {
+      "inference._types:GoogleAiStudioTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_googleaistudio:ServiceType": {
+      "inference._types:GoogleAiServiceType": {
         "type": "string",
         "enum": [
           "googleaistudio"
         ]
       },
-      "inference.put_googleaistudio:GoogleAiStudioServiceSettings": {
+      "inference._types:GoogleAiStudioServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78731,20 +78961,20 @@
           "model_id"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAITaskType": {
+      "inference._types:GoogleVertexAITaskType": {
         "type": "string",
         "enum": [
           "rerank",
           "text_embedding"
         ]
       },
-      "inference.put_googlevertexai:ServiceType": {
+      "inference._types:GoogleVertexAIServiceType": {
         "type": "string",
         "enum": [
           "googlevertexai"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAIServiceSettings": {
+      "inference._types:GoogleVertexAIServiceSettings": {
         "type": "object",
         "properties": {
           "location": {
@@ -78780,7 +79010,7 @@
           "service_account_json"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAITaskSettings": {
+      "inference._types:GoogleVertexAITaskSettings": {
         "type": "object",
         "properties": {
           "auto_truncate": {
@@ -78793,19 +79023,19 @@
           }
         }
       },
-      "inference.put_hugging_face:HuggingFaceTaskType": {
+      "inference._types:HuggingFaceTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_hugging_face:ServiceType": {
+      "inference._types:HuggingFaceServiceType": {
         "type": "string",
         "enum": [
           "hugging_face"
         ]
       },
-      "inference.put_hugging_face:HuggingFaceServiceSettings": {
+      "inference._types:HuggingFaceServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78828,20 +79058,20 @@
           "url"
         ]
       },
-      "inference.put_jinaai:JinaAITaskType": {
+      "inference._types:JinaAITaskType": {
         "type": "string",
         "enum": [
           "rerank",
           "text_embedding"
         ]
       },
-      "inference.put_jinaai:ServiceType": {
+      "inference._types:JinaAIServiceType": {
         "type": "string",
         "enum": [
           "jinaai"
         ]
       },
-      "inference.put_jinaai:JinaAIServiceSettings": {
+      "inference._types:JinaAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78859,14 +79089,14 @@
             "$ref": "#/components/schemas/inference._types:RateLimitSetting"
           },
           "similarity": {
-            "$ref": "#/components/schemas/inference.put_jinaai:SimilarityType"
+            "$ref": "#/components/schemas/inference._types:JinaAISimilarityType"
           }
         },
         "required": [
           "api_key"
         ]
       },
-      "inference.put_jinaai:SimilarityType": {
+      "inference._types:JinaAISimilarityType": {
         "type": "string",
         "enum": [
           "cosine",
@@ -78874,7 +79104,7 @@
           "l2_norm"
         ]
       },
-      "inference.put_jinaai:JinaAITaskSettings": {
+      "inference._types:JinaAITaskSettings": {
         "type": "object",
         "properties": {
           "return_documents": {
@@ -78882,7 +79112,7 @@
             "type": "boolean"
           },
           "task": {
-            "$ref": "#/components/schemas/inference.put_jinaai:TextEmbeddingTask"
+            "$ref": "#/components/schemas/inference._types:JinaAITextEmbeddingTask"
           },
           "top_n": {
             "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
@@ -78890,7 +79120,7 @@
           }
         }
       },
-      "inference.put_jinaai:TextEmbeddingTask": {
+      "inference._types:JinaAITextEmbeddingTask": {
         "type": "string",
         "enum": [
           "classification",
@@ -78899,19 +79129,19 @@
           "search"
         ]
       },
-      "inference.put_mistral:MistralTaskType": {
+      "inference._types:MistralTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_mistral:ServiceType": {
+      "inference._types:MistralServiceType": {
         "type": "string",
         "enum": [
           "mistral"
         ]
       },
-      "inference.put_mistral:MistralServiceSettings": {
+      "inference._types:MistralServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78941,7 +79171,7 @@
           "model"
         ]
       },
-      "inference.put_openai:OpenAITaskType": {
+      "inference._types:OpenAITaskType": {
         "type": "string",
         "enum": [
           "chat_completion",
@@ -78949,13 +79179,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_openai:ServiceType": {
+      "inference._types:OpenAIServiceType": {
         "type": "string",
         "enum": [
           "openai"
         ]
       },
-      "inference.put_openai:OpenAIServiceSettings": {
+      "inference._types:OpenAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -78993,7 +79223,7 @@
           "model_id"
         ]
       },
-      "inference.put_openai:OpenAITaskSettings": {
+      "inference._types:OpenAITaskSettings": {
         "type": "object",
         "properties": {
           "user": {
@@ -79002,20 +79232,20 @@
           }
         }
       },
-      "inference.put_voyageai:VoyageAITaskType": {
+      "inference._types:VoyageAITaskType": {
         "type": "string",
         "enum": [
           "text_embedding",
           "rerank"
         ]
       },
-      "inference.put_voyageai:ServiceType": {
+      "inference._types:VoyageAIServiceType": {
         "type": "string",
         "enum": [
           "voyageai"
         ]
       },
-      "inference.put_voyageai:VoyageAIServiceSettings": {
+      "inference._types:VoyageAIServiceSettings": {
         "type": "object",
         "properties": {
           "dimensions": {
@@ -79047,7 +79277,7 @@
           "model_id"
         ]
       },
-      "inference.put_voyageai:VoyageAITaskSettings": {
+      "inference._types:VoyageAITaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
@@ -79068,19 +79298,19 @@
           }
         }
       },
-      "inference.put_watsonx:WatsonxTaskType": {
+      "inference._types:WatsonxTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_watsonx:ServiceType": {
+      "inference._types:WatsonxServiceType": {
         "type": "string",
         "enum": [
           "watsonxai"
         ]
       },
-      "inference.put_watsonx:WatsonxServiceSettings": {
+      "inference._types:WatsonxServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 31848e3387..61a0b6bbfa 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9273,10 +9273,11 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object"
+                "$ref": "#/components/schemas/inference._types:RequestChatCompletionBase"
               }
             }
-          }
+          },
+          "required": true
         },
         "responses": {
           "200": {
@@ -9564,10 +9565,11 @@
           "content": {
             "application/json": {
               "schema": {
-                "type": "object"
+                "$ref": "#/components/schemas/inference._types:RequestChatCompletionBase"
               }
             }
-          }
+          },
+          "required": true
         },
         "responses": {
           "200": {
@@ -9600,7 +9602,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskType"
+              "$ref": "#/components/schemas/inference._types:AlibabaCloudTaskType"
             },
             "style": "simple"
           },
@@ -9626,13 +9628,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AlibabaCloudTaskSettings"
                   }
                 },
                 "required": [
@@ -9696,7 +9698,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType"
+              "$ref": "#/components/schemas/inference._types:AmazonBedrockTaskType"
             },
             "style": "simple"
           },
@@ -9722,13 +9724,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AmazonBedrockTaskSettings"
                   }
                 },
                 "required": [
@@ -9782,7 +9784,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskType"
+              "$ref": "#/components/schemas/inference._types:AnthropicTaskType"
             },
             "style": "simple"
           },
@@ -9808,13 +9810,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AnthropicServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:AnthropicServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AnthropicServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AnthropicTaskSettings"
                   }
                 },
                 "required": [
@@ -9862,7 +9864,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType"
+              "$ref": "#/components/schemas/inference._types:AzureAiStudioTaskType"
             },
             "style": "simple"
           },
@@ -9888,13 +9890,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureAiStudioTaskSettings"
                   }
                 },
                 "required": [
@@ -9948,7 +9950,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskType"
+              "$ref": "#/components/schemas/inference._types:AzureOpenAITaskType"
             },
             "style": "simple"
           },
@@ -9974,13 +9976,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_azureopenai:AzureOpenAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:AzureOpenAITaskSettings"
                   }
                 },
                 "required": [
@@ -10034,7 +10036,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_cohere:CohereTaskType"
+              "$ref": "#/components/schemas/inference._types:CohereTaskType"
             },
             "style": "simple"
           },
@@ -10060,13 +10062,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_cohere:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:CohereServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_cohere:CohereServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:CohereServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_cohere:CohereTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:CohereTaskSettings"
                   }
                 },
                 "required": [
@@ -10120,7 +10122,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_eis:EisTaskType"
+              "$ref": "#/components/schemas/inference._types:EisTaskType"
             },
             "style": "simple"
           },
@@ -10143,10 +10145,10 @@
                 "type": "object",
                 "properties": {
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_eis:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:EisServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:EisServiceSettings"
                   }
                 },
                 "required": [
@@ -10188,7 +10190,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskType"
+              "$ref": "#/components/schemas/inference._types:ElasticsearchTaskType"
             },
             "style": "simple"
           },
@@ -10214,13 +10216,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskSettings"
+                    "$ref": "#/components/schemas/inference._types:ElasticsearchTaskSettings"
                   }
                 },
                 "required": [
@@ -10300,7 +10302,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_elser:ElserTaskType"
+              "$ref": "#/components/schemas/inference._types:ElserTaskType"
             },
             "style": "simple"
           },
@@ -10326,10 +10328,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_elser:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:ElserServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_elser:ElserServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:ElserServiceSettings"
                   }
                 },
                 "required": [
@@ -10390,7 +10392,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioTaskType"
+              "$ref": "#/components/schemas/inference._types:GoogleAiStudioTaskType"
             },
             "style": "simple"
           },
@@ -10416,10 +10418,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_googleaistudio:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:GoogleAiServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleAiStudioServiceSettings"
                   }
                 },
                 "required": [
@@ -10468,7 +10470,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskType"
+              "$ref": "#/components/schemas/inference._types:GoogleVertexAITaskType"
             },
             "style": "simple"
           },
@@ -10494,13 +10496,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:GoogleVertexAITaskSettings"
                   }
                 },
                 "required": [
@@ -10554,7 +10556,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceTaskType"
+              "$ref": "#/components/schemas/inference._types:HuggingFaceTaskType"
             },
             "style": "simple"
           },
@@ -10580,10 +10582,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_hugging_face:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:HuggingFaceServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_hugging_face:HuggingFaceServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:HuggingFaceServiceSettings"
                   }
                 },
                 "required": [
@@ -10632,7 +10634,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskType"
+              "$ref": "#/components/schemas/inference._types:JinaAITaskType"
             },
             "style": "simple"
           },
@@ -10658,13 +10660,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:JinaAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:JinaAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:JinaAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:JinaAITaskSettings"
                   }
                 },
                 "required": [
@@ -10718,7 +10720,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_mistral:MistralTaskType"
+              "$ref": "#/components/schemas/inference._types:MistralTaskType"
             },
             "style": "simple"
           },
@@ -10744,10 +10746,10 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_mistral:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:MistralServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_mistral:MistralServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:MistralServiceSettings"
                   }
                 },
                 "required": [
@@ -10795,7 +10797,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_openai:OpenAITaskType"
+              "$ref": "#/components/schemas/inference._types:OpenAITaskType"
             },
             "style": "simple"
           },
@@ -10821,13 +10823,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_openai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:OpenAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_openai:OpenAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:OpenAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_openai:OpenAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:OpenAITaskSettings"
                   }
                 },
                 "required": [
@@ -10881,7 +10883,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskType"
+              "$ref": "#/components/schemas/inference._types:VoyageAITaskType"
             },
             "style": "simple"
           },
@@ -10907,13 +10909,13 @@
                     "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings"
                   },
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:VoyageAIServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:VoyageAIServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:VoyageAIServiceSettings"
                   },
                   "task_settings": {
-                    "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskSettings"
+                    "$ref": "#/components/schemas/inference._types:VoyageAITaskSettings"
                   }
                 },
                 "required": [
@@ -10967,7 +10969,7 @@
             "required": true,
             "deprecated": false,
             "schema": {
-              "$ref": "#/components/schemas/inference.put_watsonx:WatsonxTaskType"
+              "$ref": "#/components/schemas/inference._types:WatsonxTaskType"
             },
             "style": "simple"
           },
@@ -10990,10 +10992,10 @@
                 "type": "object",
                 "properties": {
                   "service": {
-                    "$ref": "#/components/schemas/inference.put_watsonx:ServiceType"
+                    "$ref": "#/components/schemas/inference._types:WatsonxServiceType"
                   },
                   "service_settings": {
-                    "$ref": "#/components/schemas/inference.put_watsonx:WatsonxServiceSettings"
+                    "$ref": "#/components/schemas/inference._types:WatsonxServiceSettings"
                   }
                 },
                 "required": [
@@ -49183,6 +49185,234 @@
           "valid"
         ]
       },
+      "inference._types:RequestChatCompletionBase": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types:RequestBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "messages": {
+                "description": "A list of objects representing the conversation.",
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/inference._types:Message"
+                }
+              },
+              "model": {
+                "description": "The ID of the model to use.",
+                "type": "string"
+              },
+              "max_completion_tokens": {
+                "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+                "type": "number"
+              },
+              "stop": {
+                "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "temperature": {
+                "description": "The sampling temperature to use.",
+                "type": "number"
+              },
+              "tool_choice": {
+                "$ref": "#/components/schemas/inference._types:CompletionToolType"
+              },
+              "tools": {
+                "description": "A list of tools that the model can call.",
+                "type": "array",
+                "items": {
+                  "$ref": "#/components/schemas/inference._types:CompletionTool"
+                }
+              },
+              "top_p": {
+                "description": "Nucleus sampling, an alternative to sampling with temperature.",
+                "type": "number"
+              }
+            },
+            "required": [
+              "messages"
+            ]
+          }
+        ]
+      },
+      "inference._types:Message": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/inference._types:MessageContent"
+          },
+          "role": {
+            "description": "The role of the message author.",
+            "type": "string"
+          },
+          "tool_call_id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "tool_calls": {
+            "description": "The tool calls generated by the model.",
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:ToolCall"
+            }
+          }
+        },
+        "required": [
+          "role"
+        ]
+      },
+      "inference._types:MessageContent": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/inference._types:ContentObject"
+            }
+          }
+        ]
+      },
+      "inference._types:ContentObject": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "description": "The text content.",
+            "type": "string"
+          },
+          "type": {
+            "description": "The type of content.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "text",
+          "type"
+        ]
+      },
+      "inference._types:ToolCall": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "$ref": "#/components/schemas/_types:Id"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:ToolCallFunction"
+          },
+          "type": {
+            "description": "The type of the tool call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "id",
+          "function",
+          "type"
+        ]
+      },
+      "inference._types:ToolCallFunction": {
+        "type": "object",
+        "properties": {
+          "arguments": {
+            "description": "The arguments to call the function with in JSON format.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "arguments",
+          "name"
+        ]
+      },
+      "inference._types:CompletionToolType": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "$ref": "#/components/schemas/inference._types:CompletionToolChoice"
+          }
+        ]
+      },
+      "inference._types:CompletionToolChoice": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of the tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:CompletionToolChoiceFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference._types:CompletionToolChoiceFunction": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "description": "The name of the function to call.",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "inference._types:CompletionTool": {
+        "type": "object",
+        "properties": {
+          "type": {
+            "description": "The type of tool.",
+            "type": "string"
+          },
+          "function": {
+            "$ref": "#/components/schemas/inference._types:CompletionToolFunction"
+          }
+        },
+        "required": [
+          "type",
+          "function"
+        ]
+      },
+      "inference._types:CompletionToolFunction": {
+        "type": "object",
+        "properties": {
+          "description": {
+            "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+            "type": "string"
+          },
+          "name": {
+            "description": "The name of the function.",
+            "type": "string"
+          },
+          "parameters": {
+            "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+            "type": "object"
+          },
+          "strict": {
+            "description": "Whether to enable schema adherence when generating the function call.",
+            "type": "boolean"
+          }
+        },
+        "required": [
+          "name"
+        ]
+      },
+      "_types:RequestBase": {
+        "type": "object"
+      },
       "_types:StreamResult": {
         "type": "object"
       },
@@ -49314,7 +49544,7 @@
       "inference._types:ServiceSettings": {
         "type": "object"
       },
-      "inference.put_alibabacloud:AlibabaCloudTaskType": {
+      "inference._types:AlibabaCloudTaskType": {
         "type": "string",
         "enum": [
           "completion",
@@ -49323,13 +49553,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_alibabacloud:ServiceType": {
+      "inference._types:AlibabaCloudServiceType": {
         "type": "string",
         "enum": [
           "alibabacloud-ai-search"
         ]
       },
-      "inference.put_alibabacloud:AlibabaCloudServiceSettings": {
+      "inference._types:AlibabaCloudServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49371,7 +49601,7 @@
           }
         }
       },
-      "inference.put_alibabacloud:AlibabaCloudTaskSettings": {
+      "inference._types:AlibabaCloudTaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
@@ -49384,20 +49614,20 @@
           }
         }
       },
-      "inference.put_amazonbedrock:AmazonBedrockTaskType": {
+      "inference._types:AmazonBedrockTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_amazonbedrock:ServiceType": {
+      "inference._types:AmazonBedrockServiceType": {
         "type": "string",
         "enum": [
           "amazonbedrock"
         ]
       },
-      "inference.put_amazonbedrock:AmazonBedrockServiceSettings": {
+      "inference._types:AmazonBedrockServiceSettings": {
         "type": "object",
         "properties": {
           "access_key": {
@@ -49440,7 +49670,7 @@
           "secret_key"
         ]
       },
-      "inference.put_amazonbedrock:AmazonBedrockTaskSettings": {
+      "inference._types:AmazonBedrockTaskSettings": {
         "type": "object",
         "properties": {
           "max_new_tokens": {
@@ -49461,19 +49691,19 @@
           }
         }
       },
-      "inference.put_anthropic:AnthropicTaskType": {
+      "inference._types:AnthropicTaskType": {
         "type": "string",
         "enum": [
           "completion"
         ]
       },
-      "inference.put_anthropic:ServiceType": {
+      "inference._types:AnthropicServiceType": {
         "type": "string",
         "enum": [
           "anthropic"
         ]
       },
-      "inference.put_anthropic:AnthropicServiceSettings": {
+      "inference._types:AnthropicServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49493,7 +49723,7 @@
           "model_id"
         ]
       },
-      "inference.put_anthropic:AnthropicTaskSettings": {
+      "inference._types:AnthropicTaskSettings": {
         "type": "object",
         "properties": {
           "max_tokens": {
@@ -49520,20 +49750,20 @@
           "max_tokens"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioTaskType": {
+      "inference._types:AzureAiStudioTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_azureaistudio:ServiceType": {
+      "inference._types:AzureAiStudioServiceType": {
         "type": "string",
         "enum": [
           "azureaistudio"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioServiceSettings": {
+      "inference._types:AzureAiStudioServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49569,7 +49799,7 @@
           "provider"
         ]
       },
-      "inference.put_azureaistudio:AzureAiStudioTaskSettings": {
+      "inference._types:AzureAiStudioTaskSettings": {
         "type": "object",
         "properties": {
           "do_sample": {
@@ -49594,20 +49824,20 @@
           }
         }
       },
-      "inference.put_azureopenai:AzureOpenAITaskType": {
+      "inference._types:AzureOpenAITaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_azureopenai:ServiceType": {
+      "inference._types:AzureOpenAIServiceType": {
         "type": "string",
         "enum": [
           "azureopenai"
         ]
       },
-      "inference.put_azureopenai:AzureOpenAIServiceSettings": {
+      "inference._types:AzureOpenAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49652,7 +49882,7 @@
           "resource_name"
         ]
       },
-      "inference.put_azureopenai:AzureOpenAITaskSettings": {
+      "inference._types:AzureOpenAITaskSettings": {
         "type": "object",
         "properties": {
           "user": {
@@ -49661,7 +49891,7 @@
           }
         }
       },
-      "inference.put_cohere:CohereTaskType": {
+      "inference._types:CohereTaskType": {
         "type": "string",
         "enum": [
           "completion",
@@ -49669,13 +49899,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_cohere:ServiceType": {
+      "inference._types:CohereServiceType": {
         "type": "string",
         "enum": [
           "cohere"
         ]
       },
-      "inference.put_cohere:CohereServiceSettings": {
+      "inference._types:CohereServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49686,7 +49916,7 @@
             "type": "string"
           },
           "embedding_type": {
-            "$ref": "#/components/schemas/inference.put_cohere:EmbeddingType"
+            "$ref": "#/components/schemas/inference._types:CohereEmbeddingType"
           },
           "model_id": {
             "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.",
@@ -49696,14 +49926,14 @@
             "$ref": "#/components/schemas/inference._types:RateLimitSetting"
           },
           "similarity": {
-            "$ref": "#/components/schemas/inference.put_cohere:SimilarityType"
+            "$ref": "#/components/schemas/inference._types:CohereSimilarityType"
           }
         },
         "required": [
           "api_key"
         ]
       },
-      "inference.put_cohere:EmbeddingType": {
+      "inference._types:CohereEmbeddingType": {
         "type": "string",
         "enum": [
           "byte",
@@ -49711,7 +49941,7 @@
           "int8"
         ]
       },
-      "inference.put_cohere:SimilarityType": {
+      "inference._types:CohereSimilarityType": {
         "type": "string",
         "enum": [
           "cosine",
@@ -49719,11 +49949,11 @@
           "l2_norm"
         ]
       },
-      "inference.put_cohere:CohereTaskSettings": {
+      "inference._types:CohereTaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
-            "$ref": "#/components/schemas/inference.put_cohere:InputType"
+            "$ref": "#/components/schemas/inference._types:CohereInputType"
           },
           "return_documents": {
             "description": "For a `rerank` task, return doc text within the results.",
@@ -49734,11 +49964,11 @@
             "type": "number"
           },
           "truncate": {
-            "$ref": "#/components/schemas/inference.put_cohere:TruncateType"
+            "$ref": "#/components/schemas/inference._types:CohereTruncateType"
           }
         }
       },
-      "inference.put_cohere:InputType": {
+      "inference._types:CohereInputType": {
         "type": "string",
         "enum": [
           "classification",
@@ -49747,7 +49977,7 @@
           "search"
         ]
       },
-      "inference.put_cohere:TruncateType": {
+      "inference._types:CohereTruncateType": {
         "type": "string",
         "enum": [
           "END",
@@ -49755,19 +49985,19 @@
           "START"
         ]
       },
-      "inference.put_eis:EisTaskType": {
+      "inference._types:EisTaskType": {
         "type": "string",
         "enum": [
           "chat_completion"
         ]
       },
-      "inference.put_eis:ServiceType": {
+      "inference._types:EisServiceType": {
         "type": "string",
         "enum": [
           "elastic"
         ]
       },
-      "inference.put_eis:EisServiceSettings": {
+      "inference._types:EisServiceSettings": {
         "type": "object",
         "properties": {
           "model_id": {
@@ -49782,7 +50012,7 @@
           "model_id"
         ]
       },
-      "inference.put_elasticsearch:ElasticsearchTaskType": {
+      "inference._types:ElasticsearchTaskType": {
         "type": "string",
         "enum": [
           "rerank",
@@ -49790,13 +50020,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_elasticsearch:ServiceType": {
+      "inference._types:ElasticsearchServiceType": {
         "type": "string",
         "enum": [
           "elasticsearch"
         ]
       },
-      "inference.put_elasticsearch:ElasticsearchServiceSettings": {
+      "inference._types:ElasticsearchServiceSettings": {
         "type": "object",
         "properties": {
           "adaptive_allocations": {
@@ -49844,7 +50074,7 @@
           }
         }
       },
-      "inference.put_elasticsearch:ElasticsearchTaskSettings": {
+      "inference._types:ElasticsearchTaskSettings": {
         "type": "object",
         "properties": {
           "return_documents": {
@@ -49853,19 +50083,19 @@
           }
         }
       },
-      "inference.put_elser:ElserTaskType": {
+      "inference._types:ElserTaskType": {
         "type": "string",
         "enum": [
           "sparse_embedding"
         ]
       },
-      "inference.put_elser:ServiceType": {
+      "inference._types:ElserServiceType": {
         "type": "string",
         "enum": [
           "elser"
         ]
       },
-      "inference.put_elser:ElserServiceSettings": {
+      "inference._types:ElserServiceSettings": {
         "type": "object",
         "properties": {
           "adaptive_allocations": {
@@ -49885,20 +50115,20 @@
           "num_threads"
         ]
       },
-      "inference.put_googleaistudio:GoogleAiStudioTaskType": {
+      "inference._types:GoogleAiStudioTaskType": {
         "type": "string",
         "enum": [
           "completion",
           "text_embedding"
         ]
       },
-      "inference.put_googleaistudio:ServiceType": {
+      "inference._types:GoogleAiServiceType": {
         "type": "string",
         "enum": [
           "googleaistudio"
         ]
       },
-      "inference.put_googleaistudio:GoogleAiStudioServiceSettings": {
+      "inference._types:GoogleAiStudioServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -49921,20 +50151,20 @@
           "model_id"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAITaskType": {
+      "inference._types:GoogleVertexAITaskType": {
         "type": "string",
         "enum": [
           "rerank",
           "text_embedding"
         ]
       },
-      "inference.put_googlevertexai:ServiceType": {
+      "inference._types:GoogleVertexAIServiceType": {
         "type": "string",
         "enum": [
           "googlevertexai"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAIServiceSettings": {
+      "inference._types:GoogleVertexAIServiceSettings": {
         "type": "object",
         "properties": {
           "location": {
@@ -49970,7 +50200,7 @@
           "service_account_json"
         ]
       },
-      "inference.put_googlevertexai:GoogleVertexAITaskSettings": {
+      "inference._types:GoogleVertexAITaskSettings": {
         "type": "object",
         "properties": {
           "auto_truncate": {
@@ -49983,19 +50213,19 @@
           }
         }
       },
-      "inference.put_hugging_face:HuggingFaceTaskType": {
+      "inference._types:HuggingFaceTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_hugging_face:ServiceType": {
+      "inference._types:HuggingFaceServiceType": {
         "type": "string",
         "enum": [
           "hugging_face"
         ]
       },
-      "inference.put_hugging_face:HuggingFaceServiceSettings": {
+      "inference._types:HuggingFaceServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -50018,20 +50248,20 @@
           "url"
         ]
       },
-      "inference.put_jinaai:JinaAITaskType": {
+      "inference._types:JinaAITaskType": {
         "type": "string",
         "enum": [
           "rerank",
           "text_embedding"
         ]
       },
-      "inference.put_jinaai:ServiceType": {
+      "inference._types:JinaAIServiceType": {
         "type": "string",
         "enum": [
           "jinaai"
         ]
       },
-      "inference.put_jinaai:JinaAIServiceSettings": {
+      "inference._types:JinaAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -50049,14 +50279,14 @@
             "$ref": "#/components/schemas/inference._types:RateLimitSetting"
           },
           "similarity": {
-            "$ref": "#/components/schemas/inference.put_jinaai:SimilarityType"
+            "$ref": "#/components/schemas/inference._types:JinaAISimilarityType"
           }
         },
         "required": [
           "api_key"
         ]
       },
-      "inference.put_jinaai:SimilarityType": {
+      "inference._types:JinaAISimilarityType": {
         "type": "string",
         "enum": [
           "cosine",
@@ -50064,7 +50294,7 @@
           "l2_norm"
         ]
       },
-      "inference.put_jinaai:JinaAITaskSettings": {
+      "inference._types:JinaAITaskSettings": {
         "type": "object",
         "properties": {
           "return_documents": {
@@ -50072,7 +50302,7 @@
             "type": "boolean"
           },
           "task": {
-            "$ref": "#/components/schemas/inference.put_jinaai:TextEmbeddingTask"
+            "$ref": "#/components/schemas/inference._types:JinaAITextEmbeddingTask"
           },
           "top_n": {
             "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
@@ -50080,7 +50310,7 @@
           }
         }
       },
-      "inference.put_jinaai:TextEmbeddingTask": {
+      "inference._types:JinaAITextEmbeddingTask": {
         "type": "string",
         "enum": [
           "classification",
@@ -50089,19 +50319,19 @@
           "search"
         ]
       },
-      "inference.put_mistral:MistralTaskType": {
+      "inference._types:MistralTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_mistral:ServiceType": {
+      "inference._types:MistralServiceType": {
         "type": "string",
         "enum": [
           "mistral"
         ]
       },
-      "inference.put_mistral:MistralServiceSettings": {
+      "inference._types:MistralServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -50131,7 +50361,7 @@
           "model"
         ]
       },
-      "inference.put_openai:OpenAITaskType": {
+      "inference._types:OpenAITaskType": {
         "type": "string",
         "enum": [
           "chat_completion",
@@ -50139,13 +50369,13 @@
           "text_embedding"
         ]
       },
-      "inference.put_openai:ServiceType": {
+      "inference._types:OpenAIServiceType": {
         "type": "string",
         "enum": [
           "openai"
         ]
       },
-      "inference.put_openai:OpenAIServiceSettings": {
+      "inference._types:OpenAIServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
@@ -50183,7 +50413,7 @@
           "model_id"
         ]
       },
-      "inference.put_openai:OpenAITaskSettings": {
+      "inference._types:OpenAITaskSettings": {
         "type": "object",
         "properties": {
           "user": {
@@ -50192,20 +50422,20 @@
           }
         }
       },
-      "inference.put_voyageai:VoyageAITaskType": {
+      "inference._types:VoyageAITaskType": {
         "type": "string",
         "enum": [
           "text_embedding",
           "rerank"
         ]
       },
-      "inference.put_voyageai:ServiceType": {
+      "inference._types:VoyageAIServiceType": {
         "type": "string",
         "enum": [
           "voyageai"
         ]
       },
-      "inference.put_voyageai:VoyageAIServiceSettings": {
+      "inference._types:VoyageAIServiceSettings": {
         "type": "object",
         "properties": {
           "dimensions": {
@@ -50237,7 +50467,7 @@
           "model_id"
         ]
       },
-      "inference.put_voyageai:VoyageAITaskSettings": {
+      "inference._types:VoyageAITaskSettings": {
         "type": "object",
         "properties": {
           "input_type": {
@@ -50258,19 +50488,19 @@
           }
         }
       },
-      "inference.put_watsonx:WatsonxTaskType": {
+      "inference._types:WatsonxTaskType": {
         "type": "string",
         "enum": [
           "text_embedding"
         ]
       },
-      "inference.put_watsonx:ServiceType": {
+      "inference._types:WatsonxServiceType": {
         "type": "string",
         "enum": [
           "watsonxai"
         ]
       },
-      "inference.put_watsonx:WatsonxServiceSettings": {
+      "inference._types:WatsonxServiceSettings": {
         "type": "object",
         "properties": {
           "api_key": {
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index 017a12d569..412c2c100d 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4343,7 +4343,7 @@
         "name": "Request",
         "namespace": "inference.chat_completion_unified"
       },
-      "requestBodyRequired": false,
+      "requestBodyRequired": true,
       "requestMediaType": [
         "application/json"
       ],
@@ -4520,7 +4520,7 @@
         "name": "Request",
         "namespace": "inference.post_eis_chat_completion"
       },
-      "requestBodyRequired": false,
+      "requestBodyRequired": true,
       "requestMediaType": [
         "application/json"
       ],
@@ -27167,14 +27167,21 @@
         "CommonQueryParameters"
       ],
       "body": {
-        "kind": "properties",
-        "properties": []
+        "codegenName": "chat_completion_request",
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "RequestChatCompletionBase",
+            "namespace": "inference._types"
+          }
+        }
       },
       "description": "Perform chat completion inference",
       "inherits": {
         "type": {
-          "name": "RequestChatCompletionBase",
-          "namespace": "inference._types"
+          "name": "RequestBase",
+          "namespace": "_types"
         }
       },
       "kind": "request",
@@ -27211,7 +27218,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L24-L53"
     },
     {
       "body": {
@@ -27530,14 +27537,21 @@
         "CommonQueryParameters"
       ],
       "body": {
-        "kind": "properties",
-        "properties": []
+        "codegenName": "chat_completion_request",
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "RequestChatCompletionBase",
+            "namespace": "inference._types"
+          }
+        }
       },
       "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
       "inherits": {
         "type": {
-          "name": "RequestChatCompletionBase",
-          "namespace": "inference._types"
+          "name": "RequestBase",
+          "namespace": "_types"
         }
       },
       "kind": "request",
@@ -27560,7 +27574,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46"
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L48"
     },
     {
       "body": {
@@ -27684,8 +27698,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_alibabacloud"
+                "name": "AlibabaCloudServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -27697,7 +27711,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AlibabaCloudServiceSettings",
-                "namespace": "inference.put_alibabacloud"
+                "namespace": "inference._types"
               }
             }
           },
@@ -27709,7 +27723,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AlibabaCloudTaskSettings",
-                "namespace": "inference.put_alibabacloud"
+                "namespace": "inference._types"
               }
             }
           }
@@ -27758,7 +27772,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AlibabaCloudTaskType",
-              "namespace": "inference.put_alibabacloud"
+              "namespace": "inference._types"
             }
           }
         },
@@ -27776,7 +27790,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80"
+      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L30-L83"
     },
     {
       "body": {
@@ -27825,8 +27839,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_amazonbedrock"
+                "name": "AmazonBedrockServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -27838,7 +27852,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AmazonBedrockServiceSettings",
-                "namespace": "inference.put_amazonbedrock"
+                "namespace": "inference._types"
               }
             }
           },
@@ -27850,7 +27864,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AmazonBedrockTaskSettings",
-                "namespace": "inference.put_amazonbedrock"
+                "namespace": "inference._types"
               }
             }
           }
@@ -27889,7 +27903,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AmazonBedrockTaskType",
-              "namespace": "inference.put_amazonbedrock"
+              "namespace": "inference._types"
             }
           }
         },
@@ -27907,7 +27921,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84"
+      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L30-L86"
     },
     {
       "body": {
@@ -27956,8 +27970,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_anthropic"
+                "name": "AnthropicServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -27969,7 +27983,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AnthropicServiceSettings",
-                "namespace": "inference.put_anthropic"
+                "namespace": "inference._types"
               }
             }
           },
@@ -27981,7 +27995,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AnthropicTaskSettings",
-                "namespace": "inference.put_anthropic"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28014,7 +28028,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AnthropicTaskType",
-              "namespace": "inference.put_anthropic"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28032,7 +28046,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82"
+      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L30-L84"
     },
     {
       "body": {
@@ -28081,8 +28095,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_azureaistudio"
+                "name": "AzureAiStudioServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28094,7 +28108,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureAiStudioServiceSettings",
-                "namespace": "inference.put_azureaistudio"
+                "namespace": "inference._types"
               }
             }
           },
@@ -28106,7 +28120,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureAiStudioTaskSettings",
-                "namespace": "inference.put_azureaistudio"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28145,7 +28159,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AzureAiStudioTaskType",
-              "namespace": "inference.put_azureaistudio"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28163,7 +28177,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81"
+      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L30-L83"
     },
     {
       "body": {
@@ -28212,8 +28226,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_azureopenai"
+                "name": "AzureOpenAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28225,7 +28239,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureOpenAIServiceSettings",
-                "namespace": "inference.put_azureopenai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -28237,7 +28251,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureOpenAITaskSettings",
-                "namespace": "inference.put_azureopenai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28276,7 +28290,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AzureOpenAITaskType",
-              "namespace": "inference.put_azureopenai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28294,7 +28308,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88"
+      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L30-L91"
     },
     {
       "body": {
@@ -28343,8 +28357,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_cohere"
+                "name": "CohereServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28356,7 +28370,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "CohereServiceSettings",
-                "namespace": "inference.put_cohere"
+                "namespace": "inference._types"
               }
             }
           },
@@ -28368,7 +28382,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "CohereTaskSettings",
-                "namespace": "inference.put_cohere"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28407,7 +28421,7 @@
             "kind": "instance_of",
             "type": {
               "name": "CohereTaskType",
-              "namespace": "inference.put_cohere"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28425,7 +28439,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82"
+      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L30-L84"
     },
     {
       "body": {
@@ -28460,8 +28474,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_eis"
+                "name": "EisServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28473,7 +28487,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "EisServiceSettings",
-                "namespace": "inference.put_eis"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28500,7 +28514,7 @@
             "kind": "instance_of",
             "type": {
               "name": "EisTaskType",
-              "namespace": "inference.put_eis"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28518,7 +28532,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62"
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L28-L66"
     },
     {
       "body": {
@@ -28567,8 +28581,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_elasticsearch"
+                "name": "ElasticsearchServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28580,7 +28594,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElasticsearchServiceSettings",
-                "namespace": "inference.put_elasticsearch"
+                "namespace": "inference._types"
               }
             }
           },
@@ -28592,7 +28606,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElasticsearchTaskSettings",
-                "namespace": "inference.put_elasticsearch"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28651,7 +28665,7 @@
             "kind": "instance_of",
             "type": {
               "name": "ElasticsearchTaskType",
-              "namespace": "inference.put_elasticsearch"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28669,7 +28683,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L26-L87"
+      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L30-L91"
     },
     {
       "body": {
@@ -28724,8 +28738,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_elser"
+                "name": "ElserServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28737,7 +28751,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElserServiceSettings",
-                "namespace": "inference.put_elser"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28780,7 +28794,7 @@
             "kind": "instance_of",
             "type": {
               "name": "ElserTaskType",
-              "namespace": "inference.put_elser"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28798,7 +28812,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L26-L83"
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L29-L86"
     },
     {
       "body": {
@@ -28853,8 +28867,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_googleaistudio"
+                "name": "GoogleAiServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28866,7 +28880,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleAiStudioServiceSettings",
-                "namespace": "inference.put_googleaistudio"
+                "namespace": "inference._types"
               }
             }
           }
@@ -28900,7 +28914,7 @@
             "kind": "instance_of",
             "type": {
               "name": "GoogleAiStudioTaskType",
-              "namespace": "inference.put_googleaistudio"
+              "namespace": "inference._types"
             }
           }
         },
@@ -28918,7 +28932,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75"
+      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L29-L77"
     },
     {
       "body": {
@@ -28967,8 +28981,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_googlevertexai"
+                "name": "GoogleVertexAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -28980,7 +28994,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleVertexAIServiceSettings",
-                "namespace": "inference.put_googlevertexai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -28992,7 +29006,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleVertexAITaskSettings",
-                "namespace": "inference.put_googlevertexai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29031,7 +29045,7 @@
             "kind": "instance_of",
             "type": {
               "name": "GoogleVertexAITaskType",
-              "namespace": "inference.put_googlevertexai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29049,7 +29063,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81"
+      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L30-L83"
     },
     {
       "body": {
@@ -29098,8 +29112,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_hugging_face"
+                "name": "HuggingFaceServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29111,7 +29125,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "HuggingFaceServiceSettings",
-                "namespace": "inference.put_hugging_face"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29145,7 +29159,7 @@
             "kind": "instance_of",
             "type": {
               "name": "HuggingFaceTaskType",
-              "namespace": "inference.put_hugging_face"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29163,7 +29177,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89"
+      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L91"
     },
     {
       "body": {
@@ -29212,8 +29226,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_jinaai"
+                "name": "JinaAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29225,7 +29239,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "JinaAIServiceSettings",
-                "namespace": "inference.put_jinaai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -29237,7 +29251,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "JinaAITaskSettings",
-                "namespace": "inference.put_jinaai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29276,7 +29290,7 @@
             "kind": "instance_of",
             "type": {
               "name": "JinaAITaskType",
-              "namespace": "inference.put_jinaai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29294,7 +29308,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84"
+      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L30-L86"
     },
     {
       "body": {
@@ -29343,8 +29357,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_mistral"
+                "name": "MistralServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29356,7 +29370,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "MistralServiceSettings",
-                "namespace": "inference.put_mistral"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29389,7 +29403,7 @@
             "kind": "instance_of",
             "type": {
               "name": "MistralTaskType",
-              "namespace": "inference.put_mistral"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29407,7 +29421,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77"
+      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L29-L78"
     },
     {
       "body": {
@@ -29456,8 +29470,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_openai"
+                "name": "OpenAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29469,7 +29483,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "OpenAIServiceSettings",
-                "namespace": "inference.put_openai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -29481,7 +29495,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "OpenAITaskSettings",
-                "namespace": "inference.put_openai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29520,7 +29534,7 @@
             "kind": "instance_of",
             "type": {
               "name": "OpenAITaskType",
-              "namespace": "inference.put_openai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29538,7 +29552,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82"
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L30-L84"
     },
     {
       "body": {
@@ -29587,8 +29601,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_voyageai"
+                "name": "VoyageAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29600,7 +29614,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "VoyageAIServiceSettings",
-                "namespace": "inference.put_voyageai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -29612,7 +29626,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "VoyageAITaskSettings",
-                "namespace": "inference.put_voyageai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29651,7 +29665,7 @@
             "kind": "instance_of",
             "type": {
               "name": "VoyageAITaskType",
-              "namespace": "inference.put_voyageai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29669,7 +29683,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L28-L77"
+      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L30-L79"
     },
     {
       "body": {
@@ -29704,8 +29718,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_watsonx"
+                "name": "WatsonxServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -29717,7 +29731,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "WatsonxServiceSettings",
-                "namespace": "inference.put_watsonx"
+                "namespace": "inference._types"
               }
             }
           }
@@ -29750,7 +29764,7 @@
             "kind": "instance_of",
             "type": {
               "name": "WatsonxTaskType",
-              "namespace": "inference.put_watsonx"
+              "namespace": "inference._types"
             }
           }
         },
@@ -29768,7 +29782,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70"
+      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L28-L74"
     },
     {
       "body": {
@@ -102694,116 +102708,263 @@
       "specLocation": "indices/stats/types.ts#L169-L174"
     },
     {
-      "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "alibabacloud-ai-search"
+        }
+      ],
       "name": {
-        "name": "DenseByteVector",
+        "name": "AlibabaCloudServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L47-L51",
-      "type": {
-        "kind": "array_of",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "byte",
-            "namespace": "_types"
-          }
+      "specLocation": "inference/_types/CommonTypes.ts#L290-L292"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "space_embedding"
+        },
+        {
+          "name": "text_embedding"
         }
-      }
+      ],
+      "name": {
+        "name": "AlibabaCloudTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L283-L288"
     },
     {
-      "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "amazonbedrock"
+        }
+      ],
       "name": {
-        "name": "DenseVector",
+        "name": "AmazonBedrockServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L30-L34",
-      "type": {
-        "kind": "array_of",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "float",
-            "namespace": "_types"
-          }
+      "specLocation": "inference/_types/CommonTypes.ts#L369-L371"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
         }
-      }
+      ],
+      "name": {
+        "name": "AmazonBedrockTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L364-L367"
     },
     {
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "anthropic"
+        }
+      ],
       "name": {
-        "name": "ServiceSettings",
+        "name": "AnthropicServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L91-L91",
-      "type": {
-        "kind": "user_defined_value"
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L422-L424"
     },
     {
-      "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        }
+      ],
       "name": {
-        "name": "SparseVector",
+        "name": "AnthropicTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L24-L28",
-      "type": {
-        "key": {
-          "kind": "instance_of",
-          "type": {
-            "name": "string",
-            "namespace": "_builtins"
-          }
+      "specLocation": "inference/_types/CommonTypes.ts#L418-L420"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "azureaistudio"
+        }
+      ],
+      "name": {
+        "name": "AzureAiStudioServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L505-L507"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
         },
-        "kind": "dictionary_of",
-        "singleKey": false,
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "float",
-            "namespace": "_types"
-          }
+        {
+          "name": "text_embedding"
         }
-      }
+      ],
+      "name": {
+        "name": "AzureAiStudioTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L500-L503"
     },
     {
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "azureopenai"
+        }
+      ],
       "name": {
-        "name": "TaskSettings",
+        "name": "AzureOpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L93-L93",
-      "type": {
-        "kind": "user_defined_value"
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L569-L571"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "sparse_embedding"
+          "name": "completion"
         },
         {
           "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "AzureOpenAITaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L564-L567"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "byte"
         },
         {
-          "name": "rerank"
+          "name": "float"
         },
+        {
+          "name": "int8"
+        }
+      ],
+      "name": {
+        "name": "CohereEmbeddingType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L626-L630"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "classification"
+        },
+        {
+          "name": "clustering"
+        },
+        {
+          "name": "ingest"
+        },
+        {
+          "name": "search"
+        }
+      ],
+      "name": {
+        "name": "CohereInputType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L632-L637"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "cohere"
+        }
+      ],
+      "name": {
+        "name": "CohereServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L622-L624"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "cosine"
+        },
+        {
+          "name": "dot_product"
+        },
+        {
+          "name": "l2_norm"
+        }
+      ],
+      "name": {
+        "name": "CohereSimilarityType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L639-L643"
+    },
+    {
+      "kind": "enum",
+      "members": [
         {
           "name": "completion"
         },
         {
-          "name": "chat_completion"
+          "name": "rerank"
+        },
+        {
+          "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "TaskType",
+        "name": "CohereTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L20-L29"
+      "specLocation": "inference/_types/CommonTypes.ts#L616-L620"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "END"
+        },
+        {
+          "name": "NONE"
+        },
+        {
+          "name": "START"
+        }
+      ],
+      "name": {
+        "name": "CohereTruncateType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L645-L649"
     },
     {
       "codegenNames": [
@@ -102813,9 +102974,9 @@
       "kind": "type_alias",
       "name": {
         "name": "CompletionToolType",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57",
+      "specLocation": "inference/_types/CommonTypes.ts#L80-L83",
       "type": {
         "items": [
           {
@@ -102829,7 +102990,7 @@
             "kind": "instance_of",
             "type": {
               "name": "CompletionToolChoice",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         ],
@@ -102841,7 +103002,7 @@
       "kind": "interface",
       "name": {
         "name": "CompletionToolChoice",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -102864,19 +103025,19 @@
             "kind": "instance_of",
             "type": {
               "name": "CompletionToolChoiceFunction",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155"
+      "specLocation": "inference/_types/CommonTypes.ts#L169-L181"
     },
     {
       "description": "The tool choice function.",
       "kind": "interface",
       "name": {
         "name": "CompletionToolChoiceFunction",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -102892,195 +103053,142 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141"
+      "specLocation": "inference/_types/CommonTypes.ts#L158-L167"
     },
     {
-      "codegenNames": [
-        "string",
-        "object"
-      ],
+      "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
       "kind": "type_alias",
       "name": {
-        "name": "MessageContent",
-        "namespace": "inference.chat_completion_unified"
+        "name": "DenseByteVector",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108",
+      "specLocation": "inference/_types/Results.ts#L47-L51",
       "type": {
-        "items": [
-          {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          },
-          {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "ContentObject",
-                "namespace": "inference.chat_completion_unified"
-              }
-            }
+        "kind": "array_of",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "byte",
+            "namespace": "_types"
           }
-        ],
-        "kind": "union_of"
+        }
       }
     },
     {
-      "description": "An object style representation of a single portion of a conversation.",
-      "kind": "interface",
+      "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+      "kind": "type_alias",
       "name": {
-        "name": "ContentObject",
-        "namespace": "inference.chat_completion_unified"
+        "name": "DenseVector",
+        "namespace": "inference._types"
       },
-      "properties": [
-        {
-          "description": "The text content.",
-          "name": "text",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The type of content.",
-          "name": "type",
-          "required": true,
+      "specLocation": "inference/_types/Results.ts#L30-L34",
+      "type": {
+        "kind": "array_of",
+        "value": {
+          "kind": "instance_of",
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
+            "name": "float",
+            "namespace": "_types"
           }
         }
-      ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71"
+      }
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "completion"
-        },
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "space_embedding"
-        },
-        {
-          "name": "text_embedding"
+          "name": "elastic"
         }
       ],
       "name": {
-        "name": "AlibabaCloudTaskType",
-        "namespace": "inference.put_alibabacloud"
+        "name": "EisServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87"
+      "specLocation": "inference/_types/CommonTypes.ts#L701-L703"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "alibabacloud-ai-search"
+          "name": "chat_completion"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_alibabacloud"
+        "name": "EisTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91"
+      "specLocation": "inference/_types/CommonTypes.ts#L697-L699"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
+          "name": "elasticsearch"
         }
       ],
       "name": {
-        "name": "AmazonBedrockTaskType",
-        "namespace": "inference.put_amazonbedrock"
+        "name": "ElasticsearchServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89"
+      "specLocation": "inference/_types/CommonTypes.ts#L755-L757"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "amazonbedrock"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_amazonbedrock"
-      },
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93"
-    },
-    {
-      "kind": "enum",
-      "members": [
+          "name": "rerank"
+        },
         {
-          "name": "completion"
+          "name": "sparse_embedding"
+        },
+        {
+          "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "AnthropicTaskType",
-        "namespace": "inference.put_anthropic"
+        "name": "ElasticsearchTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86"
+      "specLocation": "inference/_types/CommonTypes.ts#L749-L753"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "anthropic"
+          "name": "elser"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_anthropic"
+        "name": "ElserServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90"
+      "specLocation": "inference/_types/CommonTypes.ts#L791-L793"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
+          "name": "sparse_embedding"
         }
       ],
       "name": {
-        "name": "AzureAiStudioTaskType",
-        "namespace": "inference.put_azureaistudio"
+        "name": "ElserTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86"
+      "specLocation": "inference/_types/CommonTypes.ts#L787-L789"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "azureaistudio"
+          "name": "googleaistudio"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_azureaistudio"
+        "name": "GoogleAiServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90"
+      "specLocation": "inference/_types/CommonTypes.ts#L818-L820"
     },
     {
       "kind": "enum",
@@ -103093,30 +103201,27 @@
         }
       ],
       "name": {
-        "name": "AzureOpenAITaskType",
-        "namespace": "inference.put_azureopenai"
+        "name": "GoogleAiStudioTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93"
+      "specLocation": "inference/_types/CommonTypes.ts#L813-L816"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "azureopenai"
+          "name": "googlevertexai"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_azureopenai"
+        "name": "GoogleVertexAIServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97"
+      "specLocation": "inference/_types/CommonTypes.ts#L866-L868"
     },
     {
       "kind": "enum",
       "members": [
-        {
-          "name": "completion"
-        },
         {
           "name": "rerank"
         },
@@ -103125,64 +103230,49 @@
         }
       ],
       "name": {
-        "name": "CohereTaskType",
-        "namespace": "inference.put_cohere"
+        "name": "GoogleVertexAITaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88"
+      "specLocation": "inference/_types/CommonTypes.ts#L861-L864"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "byte"
-        },
-        {
-          "name": "float"
-        },
-        {
-          "name": "int8"
+          "name": "hugging_face"
         }
       ],
       "name": {
-        "name": "EmbeddingType",
-        "namespace": "inference.put_cohere"
+        "name": "HuggingFaceServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98"
+      "specLocation": "inference/_types/CommonTypes.ts#L897-L899"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "classification"
-        },
-        {
-          "name": "clustering"
-        },
-        {
-          "name": "ingest"
-        },
-        {
-          "name": "search"
+          "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "InputType",
-        "namespace": "inference.put_cohere"
+        "name": "HuggingFaceTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105"
+      "specLocation": "inference/_types/CommonTypes.ts#L893-L895"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "cohere"
+          "name": "jinaai"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_cohere"
+        "name": "JinaAIServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92"
+      "specLocation": "inference/_types/CommonTypes.ts#L960-L962"
     },
     {
       "kind": "enum",
@@ -103198,325 +103288,249 @@
         }
       ],
       "name": {
-        "name": "SimilarityType",
-        "namespace": "inference.put_cohere"
+        "name": "JinaAISimilarityType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111"
+      "specLocation": "inference/_types/CommonTypes.ts#L964-L968"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "END"
-        },
-        {
-          "name": "NONE"
+          "name": "rerank"
         },
         {
-          "name": "START"
-        }
-      ],
-      "name": {
-        "name": "TruncateType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "chat_completion"
-        }
-      ],
-      "name": {
-        "name": "EisTaskType",
-        "namespace": "inference.put_eis"
-      },
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "elastic"
+          "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_eis"
+        "name": "JinaAITaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
+      "specLocation": "inference/_types/CommonTypes.ts#L955-L958"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "rerank"
+          "name": "classification"
         },
         {
-          "name": "sparse_embedding"
+          "name": "clustering"
         },
         {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "ElasticsearchTaskType",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L89-L93"
-    },
-    {
-      "kind": "enum",
-      "members": [
+          "name": "ingest"
+        },
         {
-          "name": "elasticsearch"
+          "name": "search"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_elasticsearch"
+        "name": "JinaAITextEmbeddingTask",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L95-L97"
+      "specLocation": "inference/_types/CommonTypes.ts#L970-L975"
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "sparse_embedding"
-        }
+      "codegenNames": [
+        "string",
+        "object"
       ],
+      "kind": "type_alias",
       "name": {
-        "name": "ElserTaskType",
-        "namespace": "inference.put_elser"
+        "name": "MessageContent",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L85-L87"
+      "specLocation": "inference/_types/CommonTypes.ts#L131-L134",
+      "type": {
+        "items": [
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          },
+          {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContentObject",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        ],
+        "kind": "union_of"
+      }
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "elser"
-        }
-      ],
+      "description": "An object style representation of a single portion of a conversation.",
+      "kind": "interface",
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_elser"
+        "name": "ContentObject",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L89-L91"
-    },
-    {
-      "kind": "enum",
-      "members": [
+      "properties": [
         {
-          "name": "completion"
+          "description": "The text content.",
+          "name": "text",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
         },
         {
-          "name": "text_embedding"
+          "description": "The type of content.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
         }
       ],
-      "name": {
-        "name": "GoogleAiStudioTaskType",
-        "namespace": "inference.put_googleaistudio"
-      },
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80"
+      "specLocation": "inference/_types/CommonTypes.ts#L85-L97"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "googleaistudio"
+          "name": "mistral"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_googleaistudio"
+        "name": "MistralServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84"
+      "specLocation": "inference/_types/CommonTypes.ts#L1010-L1012"
     },
     {
       "kind": "enum",
       "members": [
-        {
-          "name": "rerank"
-        },
         {
           "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "GoogleVertexAITaskType",
-        "namespace": "inference.put_googlevertexai"
-      },
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "googlevertexai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_googlevertexai"
+        "name": "MistralTaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90"
+      "specLocation": "inference/_types/CommonTypes.ts#L1006-L1008"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "text_embedding"
+          "name": "openai"
         }
       ],
       "name": {
-        "name": "HuggingFaceTaskType",
-        "namespace": "inference.put_hugging_face"
+        "name": "OpenAIServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93"
+      "specLocation": "inference/_types/CommonTypes.ts#L1072-L1074"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "hugging_face"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_hugging_face"
-      },
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97"
-    },
-    {
-      "kind": "enum",
-      "members": [
+          "name": "chat_completion"
+        },
         {
-          "name": "rerank"
+          "name": "completion"
         },
         {
           "name": "text_embedding"
         }
       ],
       "name": {
-        "name": "JinaAITaskType",
-        "namespace": "inference.put_jinaai"
+        "name": "OpenAITaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L86-L89"
+      "specLocation": "inference/_types/CommonTypes.ts#L1066-L1070"
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "jinaai"
-        }
-      ],
+      "kind": "type_alias",
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_jinaai"
+        "name": "ServiceSettings",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L91-L93"
+      "specLocation": "inference/_types/Services.ts#L91-L91",
+      "type": {
+        "kind": "user_defined_value"
+      }
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "cosine"
-        },
-        {
-          "name": "dot_product"
-        },
-        {
-          "name": "l2_norm"
-        }
-      ],
+      "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+      "kind": "type_alias",
       "name": {
-        "name": "SimilarityType",
-        "namespace": "inference.put_jinaai"
+        "name": "SparseVector",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L95-L99"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "classification"
-        },
-        {
-          "name": "clustering"
-        },
-        {
-          "name": "ingest"
+      "specLocation": "inference/_types/Results.ts#L24-L28",
+      "type": {
+        "key": {
+          "kind": "instance_of",
+          "type": {
+            "name": "string",
+            "namespace": "_builtins"
+          }
         },
-        {
-          "name": "search"
+        "kind": "dictionary_of",
+        "singleKey": false,
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "float",
+            "namespace": "_types"
+          }
         }
-      ],
-      "name": {
-        "name": "TextEmbeddingTask",
-        "namespace": "inference.put_jinaai"
-      },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L101-L106"
+      }
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "text_embedding"
-        }
-      ],
+      "kind": "type_alias",
       "name": {
-        "name": "MistralTaskType",
-        "namespace": "inference.put_mistral"
+        "name": "TaskSettings",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L79-L81"
+      "specLocation": "inference/_types/Services.ts#L93-L93",
+      "type": {
+        "kind": "user_defined_value"
+      }
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "mistral"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_mistral"
-      },
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85"
-    },
-    {
-      "kind": "enum",
-      "members": [
+          "name": "sparse_embedding"
+        },
         {
-          "name": "chat_completion"
+          "name": "text_embedding"
         },
         {
-          "name": "completion"
+          "name": "rerank"
         },
         {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "OpenAITaskType",
-        "namespace": "inference.put_openai"
-      },
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L84-L88"
-    },
-    {
-      "kind": "enum",
-      "members": [
+          "name": "completion"
+        },
         {
-          "name": "openai"
+          "name": "chat_completion"
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_openai"
+        "name": "TaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L90-L92"
+      "specLocation": "inference/_types/TaskType.ts#L20-L29"
     },
     {
       "kind": "enum",
@@ -103526,10 +103540,10 @@
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_voyageai"
+        "name": "VoyageAIServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L84-L86"
+      "specLocation": "inference/_types/CommonTypes.ts#L1140-L1142"
     },
     {
       "kind": "enum",
@@ -103543,9 +103557,9 @@
       ],
       "name": {
         "name": "VoyageAITaskType",
-        "namespace": "inference.put_voyageai"
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L79-L82"
+      "specLocation": "inference/_types/CommonTypes.ts#L1135-L1138"
     },
     {
       "kind": "enum",
@@ -103555,10 +103569,10 @@
         }
       ],
       "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_watsonx"
+        "name": "WatsonxServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L76-L78"
+      "specLocation": "inference/_types/CommonTypes.ts#L1187-L1189"
     },
     {
       "kind": "enum",
@@ -103569,9 +103583,9 @@
       ],
       "name": {
         "name": "WatsonxTaskType",
-        "namespace": "inference.put_watsonx"
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L72-L74"
+      "specLocation": "inference/_types/CommonTypes.ts#L1183-L1185"
     },
     {
       "kind": "enum",
@@ -123436,7 +123450,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "Message",
-                "namespace": "inference.chat_completion_unified"
+                "namespace": "inference._types"
               }
             }
           }
@@ -123500,7 +123514,7 @@
             "kind": "instance_of",
             "type": {
               "name": "CompletionToolType",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         },
@@ -123514,7 +123528,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "CompletionTool",
-                "namespace": "inference.chat_completion_unified"
+                "namespace": "inference._types"
               }
             }
           }
@@ -123532,14 +123546,14 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L28-L61"
+      "specLocation": "inference/_types/CommonTypes.ts#L26-L59"
     },
     {
       "description": "An object representing part of the conversation.",
       "kind": "interface",
       "name": {
         "name": "Message",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -123550,7 +123564,7 @@
             "kind": "instance_of",
             "type": {
               "name": "MessageContent",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         },
@@ -123588,20 +123602,20 @@
               "kind": "instance_of",
               "type": {
                 "name": "ToolCall",
-                "namespace": "inference.chat_completion_unified"
+                "namespace": "inference._types"
               }
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130"
+      "specLocation": "inference/_types/CommonTypes.ts#L136-L156"
     },
     {
       "description": "A tool call generated by the model.",
       "kind": "interface",
       "name": {
         "name": "ToolCall",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -123624,7 +123638,7 @@
             "kind": "instance_of",
             "type": {
               "name": "ToolCallFunction",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         },
@@ -123641,14 +123655,14 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103"
+      "specLocation": "inference/_types/CommonTypes.ts#L113-L129"
     },
     {
       "description": "The function that the model called.",
       "kind": "interface",
       "name": {
         "name": "ToolCallFunction",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -123676,14 +123690,14 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85"
+      "specLocation": "inference/_types/CommonTypes.ts#L99-L111"
     },
     {
       "description": "A list of tools that the model can call.",
       "kind": "interface",
       "name": {
         "name": "CompletionTool",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -123706,19 +123720,19 @@
             "kind": "instance_of",
             "type": {
               "name": "CompletionToolFunction",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192"
+      "specLocation": "inference/_types/CommonTypes.ts#L206-L218"
     },
     {
       "description": "The completion tool function definition.",
       "kind": "interface",
       "name": {
         "name": "CompletionToolFunction",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -123766,7 +123780,7 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178"
+      "specLocation": "inference/_types/CommonTypes.ts#L183-L204"
     },
     {
       "description": "Defines the completion result.",
@@ -124013,7 +124027,7 @@
       "kind": "interface",
       "name": {
         "name": "AlibabaCloudServiceSettings",
-        "namespace": "inference.put_alibabacloud"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124079,7 +124093,7 @@
           }
         }
       ],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138"
+      "specLocation": "inference/_types/CommonTypes.ts#L220-L265"
     },
     {
       "kind": "interface",
@@ -124107,7 +124121,7 @@
       "kind": "interface",
       "name": {
         "name": "AlibabaCloudTaskSettings",
-        "namespace": "inference.put_alibabacloud"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124135,13 +124149,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154"
+      "specLocation": "inference/_types/CommonTypes.ts#L267-L281"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AmazonBedrockServiceSettings",
-        "namespace": "inference.put_amazonbedrock"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124223,13 +124237,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137"
+      "specLocation": "inference/_types/CommonTypes.ts#L294-L336"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AmazonBedrockTaskSettings",
-        "namespace": "inference.put_amazonbedrock"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124282,13 +124296,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163"
+      "specLocation": "inference/_types/CommonTypes.ts#L338-L362"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AnthropicServiceSettings",
-        "namespace": "inference.put_anthropic"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124329,13 +124343,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108"
+      "specLocation": "inference/_types/CommonTypes.ts#L373-L389"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AnthropicTaskSettings",
-        "namespace": "inference.put_anthropic"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124389,13 +124403,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135"
+      "specLocation": "inference/_types/CommonTypes.ts#L391-L416"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AzureAiStudioServiceSettings",
-        "namespace": "inference.put_azureaistudio"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124463,13 +124477,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134"
+      "specLocation": "inference/_types/CommonTypes.ts#L426-L468"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AzureAiStudioTaskSettings",
-        "namespace": "inference.put_azureaistudio"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124534,13 +124548,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164"
+      "specLocation": "inference/_types/CommonTypes.ts#L470-L498"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AzureOpenAIServiceSettings",
-        "namespace": "inference.put_azureopenai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124626,13 +124640,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144"
+      "specLocation": "inference/_types/CommonTypes.ts#L509-L554"
     },
     {
       "kind": "interface",
       "name": {
         "name": "AzureOpenAITaskSettings",
-        "namespace": "inference.put_azureopenai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124648,13 +124662,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152"
+      "specLocation": "inference/_types/CommonTypes.ts#L556-L562"
     },
     {
       "kind": "interface",
       "name": {
         "name": "CohereServiceSettings",
-        "namespace": "inference.put_cohere"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124679,8 +124693,8 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "EmbeddingType",
-              "namespace": "inference.put_cohere"
+              "name": "CohereEmbeddingType",
+              "namespace": "inference._types"
             }
           }
         },
@@ -124715,19 +124729,19 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "SimilarityType",
-              "namespace": "inference.put_cohere"
+              "name": "CohereSimilarityType",
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160"
+      "specLocation": "inference/_types/CommonTypes.ts#L573-L614"
     },
     {
       "kind": "interface",
       "name": {
         "name": "CohereTaskSettings",
-        "namespace": "inference.put_cohere"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124737,8 +124751,8 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "InputType",
-              "namespace": "inference.put_cohere"
+              "name": "CohereInputType",
+              "namespace": "inference._types"
             }
           }
         },
@@ -124773,19 +124787,19 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TruncateType",
-              "namespace": "inference.put_cohere"
+              "name": "CohereTruncateType",
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194"
+      "specLocation": "inference/_types/CommonTypes.ts#L651-L683"
     },
     {
       "kind": "interface",
       "name": {
         "name": "EisServiceSettings",
-        "namespace": "inference.put_eis"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124813,13 +124827,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82"
+      "specLocation": "inference/_types/CommonTypes.ts#L685-L695"
     },
     {
       "kind": "interface",
       "name": {
         "name": "ElasticsearchServiceSettings",
-        "namespace": "inference.put_elasticsearch"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124885,7 +124899,7 @@
           }
         }
       ],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L99-L133"
+      "specLocation": "inference/_types/CommonTypes.ts#L705-L739"
     },
     {
       "kind": "interface",
@@ -124932,13 +124946,13 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L63-L80"
+      "specLocation": "inference/_types/CommonTypes.ts#L61-L78"
     },
     {
       "kind": "interface",
       "name": {
         "name": "ElasticsearchTaskSettings",
-        "namespace": "inference.put_elasticsearch"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -124955,13 +124969,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L135-L141"
+      "specLocation": "inference/_types/CommonTypes.ts#L741-L747"
     },
     {
       "kind": "interface",
       "name": {
         "name": "ElserServiceSettings",
-        "namespace": "inference.put_elser"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125001,13 +125015,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L93-L119"
+      "specLocation": "inference/_types/CommonTypes.ts#L759-L785"
     },
     {
       "kind": "interface",
       "name": {
         "name": "GoogleAiStudioServiceSettings",
-        "namespace": "inference.put_googleaistudio"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125049,13 +125063,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L86-L102"
+      "specLocation": "inference/_types/CommonTypes.ts#L795-L811"
     },
     {
       "kind": "interface",
       "name": {
         "name": "GoogleVertexAIServiceSettings",
-        "namespace": "inference.put_googlevertexai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125123,13 +125137,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118"
+      "specLocation": "inference/_types/CommonTypes.ts#L822-L848"
     },
     {
       "kind": "interface",
       "name": {
         "name": "GoogleVertexAITaskSettings",
-        "namespace": "inference.put_googlevertexai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125157,13 +125171,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129"
+      "specLocation": "inference/_types/CommonTypes.ts#L850-L859"
     },
     {
       "kind": "interface",
       "name": {
         "name": "HuggingFaceServiceSettings",
-        "namespace": "inference.put_hugging_face"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125205,13 +125219,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120"
+      "specLocation": "inference/_types/CommonTypes.ts#L870-L891"
     },
     {
       "kind": "interface",
       "name": {
         "name": "JinaAIServiceSettings",
-        "namespace": "inference.put_jinaai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125261,19 +125275,19 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "SimilarityType",
-              "namespace": "inference.put_jinaai"
+              "name": "JinaAISimilarityType",
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L108-L137"
+      "specLocation": "inference/_types/CommonTypes.ts#L901-L930"
     },
     {
       "kind": "interface",
       "name": {
         "name": "JinaAITaskSettings",
-        "namespace": "inference.put_jinaai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125295,8 +125309,8 @@
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TextEmbeddingTask",
-              "namespace": "inference.put_jinaai"
+              "name": "JinaAITextEmbeddingTask",
+              "namespace": "inference._types"
             }
           }
         },
@@ -125313,13 +125327,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L139-L160"
+      "specLocation": "inference/_types/CommonTypes.ts#L932-L953"
     },
     {
       "kind": "interface",
       "name": {
         "name": "MistralServiceSettings",
-        "namespace": "inference.put_mistral"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125375,13 +125389,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114"
+      "specLocation": "inference/_types/CommonTypes.ts#L977-L1004"
     },
     {
       "kind": "interface",
       "name": {
         "name": "OpenAIServiceSettings",
-        "namespace": "inference.put_openai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125462,13 +125476,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136"
+      "specLocation": "inference/_types/CommonTypes.ts#L1014-L1056"
     },
     {
       "kind": "interface",
       "name": {
         "name": "OpenAITaskSettings",
-        "namespace": "inference.put_openai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125484,13 +125498,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L138-L144"
+      "specLocation": "inference/_types/CommonTypes.ts#L1058-L1064"
     },
     {
       "kind": "interface",
       "name": {
         "name": "VoyageAIServiceSettings",
-        "namespace": "inference.put_voyageai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125548,13 +125562,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L88-L119"
+      "specLocation": "inference/_types/CommonTypes.ts#L1076-L1107"
     },
     {
       "kind": "interface",
       "name": {
         "name": "VoyageAITaskSettings",
-        "namespace": "inference.put_voyageai"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125608,13 +125622,13 @@
           }
         }
       ],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L121-L145"
+      "specLocation": "inference/_types/CommonTypes.ts#L1109-L1133"
     },
     {
       "kind": "interface",
       "name": {
         "name": "WatsonxServiceSettings",
-        "namespace": "inference.put_watsonx"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -125696,7 +125710,7 @@
           }
         }
       ],
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L80-L117"
+      "specLocation": "inference/_types/CommonTypes.ts#L1144-L1181"
     },
     {
       "description": "Defines the response for a rerank request.",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index ed3a49c837..d0503ea97a 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9055,7 +9055,7 @@
         "name": "Request",
         "namespace": "inference.chat_completion_unified"
       },
-      "requestBodyRequired": false,
+      "requestBodyRequired": true,
       "requestMediaType": [
         "application/json"
       ],
@@ -9232,7 +9232,7 @@
         "name": "Request",
         "namespace": "inference.post_eis_chat_completion"
       },
-      "requestBodyRequired": false,
+      "requestBodyRequired": true,
       "requestMediaType": [
         "application/json"
       ],
@@ -149570,43 +149570,68 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L63-L80"
+      "specLocation": "inference/_types/CommonTypes.ts#L61-L78"
     },
     {
       "kind": "interface",
-      "description": "Defines the completion result.",
       "name": {
-        "name": "CompletionInferenceResult",
+        "name": "AlibabaCloudServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "completion",
+          "description": "A valid API key for the AlibabaCloud AI Search API.",
+          "name": "api_key",
           "required": true,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionResult",
-                "namespace": "inference._types"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L84-L89"
-    },
-    {
-      "kind": "interface",
-      "description": "The completion result object",
-      "name": {
-        "name": "CompletionResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
+        },
         {
-          "name": "result",
+          "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.",
+          "extDocId": "alibabacloud-api-keys",
+          "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key",
+          "name": "host",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`",
+          "name": "service_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the workspace used for the inference task.",
+          "name": "workspace",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -149617,129 +149642,154 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L77-L82"
+      "specLocation": "inference/_types/CommonTypes.ts#L220-L265"
     },
     {
-      "kind": "interface",
-      "description": "Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint",
-      "inherits": {
-        "type": {
-          "name": "AcknowledgedResponseBase",
-          "namespace": "_types"
+      "kind": "enum",
+      "members": [
+        {
+          "name": "alibabacloud-ai-search"
         }
+      ],
+      "name": {
+        "name": "AlibabaCloudServiceType",
+        "namespace": "inference._types"
       },
+      "specLocation": "inference/_types/CommonTypes.ts#L290-L292"
+    },
+    {
+      "kind": "interface",
       "name": {
-        "name": "DeleteInferenceEndpointResult",
+        "name": "AlibabaCloudTaskSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "pipelines",
-          "required": true,
+          "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.",
+          "name": "input_type",
+          "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L110-L115"
-    },
-    {
-      "kind": "type_alias",
-      "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
-      "name": {
-        "name": "DenseByteVector",
-        "namespace": "inference._types"
-      },
-      "specLocation": "inference/_types/Results.ts#L47-L51",
-      "type": {
-        "kind": "array_of",
-        "value": {
-          "kind": "instance_of",
+        },
+        {
+          "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.",
+          "name": "return_token",
+          "required": false,
           "type": {
-            "name": "byte",
-            "namespace": "_types"
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
           }
         }
-      }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L267-L281"
     },
     {
-      "kind": "type_alias",
-      "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "space_embedding"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
       "name": {
-        "name": "DenseVector",
+        "name": "AlibabaCloudTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L30-L34",
-      "type": {
-        "kind": "array_of",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "float",
-            "namespace": "_types"
-          }
-        }
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L283-L288"
     },
     {
       "kind": "interface",
-      "description": "Chunking configuration object",
       "name": {
-        "name": "InferenceChunkingSettings",
+        "name": "AmazonBedrockServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The maximum size of a chunk in words.\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).",
-          "name": "max_chunk_size",
-          "required": false,
-          "serverDefault": 250,
+          "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.",
+          "name": "access_key",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.",
-          "name": "overlap",
-          "required": false,
-          "serverDefault": 100,
+          "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.",
+          "extDocId": "amazonbedrock-models",
+          "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html",
+          "name": "model",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.",
-          "name": "sentence_overlap",
+          "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only",
+          "name": "provider",
           "required": false,
-          "serverDefault": 1,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The chunking strategy: `sentence` or `word`.",
-          "name": "strategy",
+          "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.",
+          "extDocId": "amazonbedrock-models",
+          "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html",
+          "name": "region",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.",
+          "name": "rate_limit",
           "required": false,
-          "serverDefault": "sentence",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.",
+          "extDocId": "amazonbedrock-secret-keys",
+          "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html",
+          "name": "secret_key",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -149749,84 +149799,106 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L60-L89"
+      "specLocation": "inference/_types/CommonTypes.ts#L294-L336"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "amazonbedrock"
+        }
+      ],
+      "name": {
+        "name": "AmazonBedrockServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L369-L371"
     },
     {
       "kind": "interface",
-      "description": "Configuration options when storing the inference endpoint",
       "name": {
-        "name": "InferenceEndpoint",
+        "name": "AmazonBedrockTaskSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "Chunking configuration object",
-          "name": "chunking_settings",
+          "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.",
+          "name": "max_new_tokens",
           "required": false,
+          "serverDefault": 64,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "InferenceChunkingSettings",
-              "namespace": "inference._types"
+              "name": "integer",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "The service type",
-          "name": "service",
-          "required": true,
+          "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.",
+          "name": "temperature",
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "float",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "Settings specific to the service",
-          "name": "service_settings",
-          "required": true,
+          "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.",
+          "name": "top_k",
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "ServiceSettings",
-              "namespace": "inference._types"
+              "name": "float",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "Task settings specific to the service and task type",
-          "name": "task_settings",
+          "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.",
+          "name": "top_p",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TaskSettings",
-              "namespace": "inference._types"
+              "name": "float",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L24-L44"
+      "specLocation": "inference/_types/CommonTypes.ts#L338-L362"
     },
     {
-      "kind": "interface",
-      "description": "Represents an inference endpoint as returned by the GET API",
-      "inherits": {
-        "type": {
-          "name": "InferenceEndpoint",
-          "namespace": "inference._types"
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
         }
+      ],
+      "name": {
+        "name": "AmazonBedrockTaskType",
+        "namespace": "inference._types"
       },
+      "specLocation": "inference/_types/CommonTypes.ts#L364-L367"
+    },
+    {
+      "kind": "interface",
       "name": {
-        "name": "InferenceEndpointInfo",
+        "name": "AnthropicServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The inference Id",
-          "name": "inference_id",
+          "description": "A valid API key for the Anthropic API.",
+          "name": "api_key",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -149837,30 +149909,56 @@
           }
         },
         {
-          "description": "The task type",
-          "name": "task_type",
+          "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.",
+          "extDocId": "anothropic-models",
+          "name": "model_id",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TaskType",
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
               "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L46-L58"
+      "specLocation": "inference/_types/CommonTypes.ts#L373-L389"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "anthropic"
+        }
+      ],
+      "name": {
+        "name": "AnthropicServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L422-L424"
     },
     {
       "kind": "interface",
-      "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested",
       "name": {
-        "name": "RankedDocument",
+        "name": "AnthropicTaskSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "index",
+          "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.",
+          "name": "max_tokens",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -149871,8 +149969,11 @@
           }
         },
         {
-          "name": "relevance_score",
-          "required": true,
+          "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.",
+          "extDocId": "anthropic-messages",
+          "extDocUrl": "https://docs.anthropic.com/en/api/messages",
+          "name": "temperature",
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -149882,76 +149983,72 @@
           }
         },
         {
-          "name": "text",
+          "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.",
+          "name": "top_k",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "integer",
+              "namespace": "_types"
             }
           }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L91-L101"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "RateLimitSetting",
-        "namespace": "inference._types"
-      },
-      "properties": [
+        },
         {
-          "description": "The number of requests allowed per minute.",
-          "name": "requests_per_minute",
+          "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.",
+          "name": "top_p",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
+              "name": "float",
               "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L95-L100"
+      "specLocation": "inference/_types/CommonTypes.ts#L391-L416"
     },
     {
-      "kind": "interface",
-      "attachedBehaviors": [
-        "CommonQueryParameters"
-      ],
-      "inherits": {
-        "type": {
-          "name": "RequestBase",
-          "namespace": "_types"
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
         }
+      ],
+      "name": {
+        "name": "AnthropicTaskType",
+        "namespace": "inference._types"
       },
+      "specLocation": "inference/_types/CommonTypes.ts#L418-L420"
+    },
+    {
+      "kind": "interface",
       "name": {
-        "name": "RequestChatCompletionBase",
+        "name": "AzureAiStudioServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "A list of objects representing the conversation.",
-          "name": "messages",
+          "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "azureaistudio-api-keys",
+          "extDocUrl": "https://ai.azure.com/",
+          "name": "api_key",
           "required": true,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "Message",
-                "namespace": "inference.chat_completion_unified"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The ID of the model to use.",
-          "name": "model",
-          "required": false,
+          "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.",
+          "extDocId": "azureaistudio-endpoint-types",
+          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio",
+          "name": "endpoint_type",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -149961,35 +150058,67 @@
           }
         },
         {
-          "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-          "name": "max_completion_tokens",
-          "required": false,
+          "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.",
+          "name": "target",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "long",
-              "namespace": "_types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-          "name": "stop",
-          "required": false,
+          "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types",
+          "name": "provider",
+          "required": true,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The sampling temperature to use.",
-          "name": "temperature",
+          "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L426-L468"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "azureaistudio"
+        }
+      ],
+      "name": {
+        "name": "AzureAiStudioServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L505-L507"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "AzureAiStudioTaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.",
+          "name": "do_sample",
           "required": false,
           "type": {
             "kind": "instance_of",
@@ -150000,34 +150129,32 @@
           }
         },
         {
-          "description": "Controls which tool is called by the model.",
-          "name": "tool_choice",
+          "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.",
+          "name": "max_new_tokens",
           "required": false,
+          "serverDefault": 64,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "CompletionToolType",
-              "namespace": "inference.chat_completion_unified"
+              "name": "integer",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "A list of tools that the model can call.",
-          "name": "tools",
+          "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.",
+          "name": "temperature",
           "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionTool",
-                "namespace": "inference.chat_completion_unified"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "Nucleus sampling, an alternative to sampling with temperature.",
+          "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.",
           "name": "top_p",
           "required": false,
           "type": {
@@ -150037,261 +150164,476 @@
               "namespace": "_types"
             }
           }
-        }
-      ],
-      "specLocation": "inference/_types/CommonTypes.ts#L28-L61"
-    },
-    {
-      "kind": "interface",
-      "description": "Defines the response for a rerank request.",
-      "name": {
-        "name": "RerankedInferenceResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
+        },
         {
-          "name": "rerank",
-          "required": true,
+          "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
+          "name": "user",
+          "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "RankedDocument",
-                "namespace": "inference._types"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L103-L108"
+      "specLocation": "inference/_types/CommonTypes.ts#L470-L498"
     },
     {
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
       "name": {
-        "name": "ServiceSettings",
+        "name": "AzureAiStudioTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L91-L91",
-      "type": {
-        "kind": "user_defined_value"
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L500-L503"
     },
     {
       "kind": "interface",
-      "description": "The response format for the sparse embedding request.",
       "name": {
-        "name": "SparseEmbeddingInferenceResult",
+        "name": "AzureOpenAIServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "sparse_embedding",
+          "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "azureopenai-auth",
+          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication",
+          "name": "api_key",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.",
+          "name": "api_version",
           "required": true,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "SparseEmbeddingResult",
-                "namespace": "inference._types"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.",
+          "extDocId": "azureopenai",
+          "extDocUrl": "https://oai.azure.com/",
+          "name": "deployment_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.",
+          "extDocId": "azureopenai-auth",
+          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication",
+          "name": "entra_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.",
+          "extDocId": "azureopenai-quota-limits",
+          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.",
+          "extDocId": "azureopenai-portal",
+          "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll",
+          "name": "resource_name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L40-L45"
+      "specLocation": "inference/_types/CommonTypes.ts#L509-L554"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "azureopenai"
+        }
+      ],
+      "name": {
+        "name": "AzureOpenAIServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L569-L571"
     },
     {
       "kind": "interface",
       "name": {
-        "name": "SparseEmbeddingResult",
+        "name": "AzureOpenAITaskSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "embedding",
-          "required": true,
+          "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
+          "name": "user",
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "SparseVector",
-              "namespace": "inference._types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L36-L38"
+      "specLocation": "inference/_types/CommonTypes.ts#L556-L562"
     },
     {
-      "kind": "type_alias",
-      "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
       "name": {
-        "name": "SparseVector",
+        "name": "AzureOpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L24-L28",
-      "type": {
-        "kind": "dictionary_of",
-        "key": {
-          "kind": "instance_of",
-          "type": {
-            "name": "string",
-            "namespace": "_builtins"
-          }
-        },
-        "singleKey": false,
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "float",
-            "namespace": "_types"
-          }
-        }
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L564-L567"
     },
     {
-      "kind": "type_alias",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "byte"
+        },
+        {
+          "name": "float"
+        },
+        {
+          "name": "int8"
+        }
+      ],
       "name": {
-        "name": "TaskSettings",
+        "name": "CohereEmbeddingType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L93-L93",
-      "type": {
-        "kind": "user_defined_value"
-      }
+      "specLocation": "inference/_types/CommonTypes.ts#L626-L630"
     },
     {
       "kind": "enum",
       "members": [
         {
-          "name": "sparse_embedding"
-        },
-        {
-          "name": "text_embedding"
+          "name": "classification"
         },
         {
-          "name": "rerank"
+          "name": "clustering"
         },
         {
-          "name": "completion"
+          "name": "ingest"
         },
         {
-          "name": "chat_completion"
+          "name": "search"
         }
       ],
       "name": {
-        "name": "TaskType",
+        "name": "CohereInputType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L20-L29"
+      "specLocation": "inference/_types/CommonTypes.ts#L632-L637"
     },
     {
       "kind": "interface",
-      "description": "The text embedding result object for byte representation",
       "name": {
-        "name": "TextEmbeddingByteResult",
+        "name": "CohereServiceSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "embedding",
+          "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "cohere-api-keys",
+          "extDocUrl": "https://dashboard.cohere.com/api-keys",
+          "name": "api_key",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "DenseByteVector",
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.",
+          "name": "embedding_type",
+          "required": false,
+          "serverDefault": "float",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CohereEmbeddingType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.",
+          "name": "model_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.",
+          "name": "similarity",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CohereSimilarityType",
               "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L53-L58"
+      "specLocation": "inference/_types/CommonTypes.ts#L573-L614"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "cohere"
+        }
+      ],
+      "name": {
+        "name": "CohereServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L622-L624"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "cosine"
+        },
+        {
+          "name": "dot_product"
+        },
+        {
+          "name": "l2_norm"
+        }
+      ],
+      "name": {
+        "name": "CohereSimilarityType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L639-L643"
     },
     {
       "kind": "interface",
-      "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
       "name": {
-        "name": "TextEmbeddingInferenceResult",
+        "name": "CohereTaskSettings",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "text_embedding_bytes",
+          "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.",
+          "name": "input_type",
           "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "TextEmbeddingByteResult",
-                "namespace": "inference._types"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "CohereInputType",
+              "namespace": "inference._types"
             }
           }
         },
         {
-          "name": "text_embedding_bits",
+          "description": "For a `rerank` task, return doc text within the results.",
+          "name": "return_documents",
           "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "TextEmbeddingByteResult",
-                "namespace": "inference._types"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "name": "text_embedding",
+          "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
+          "name": "top_n",
           "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.",
+          "name": "truncate",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CohereTruncateType",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L651-L683"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "CohereTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L616-L620"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "END"
+        },
+        {
+          "name": "NONE"
+        },
+        {
+          "name": "START"
+        }
+      ],
+      "name": {
+        "name": "CohereTruncateType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L645-L649"
+    },
+    {
+      "kind": "interface",
+      "description": "Defines the completion result.",
+      "name": {
+        "name": "CompletionInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "completion",
+          "required": true,
           "type": {
             "kind": "array_of",
             "value": {
               "kind": "instance_of",
               "type": {
-                "name": "TextEmbeddingResult",
+                "name": "CompletionResult",
                 "namespace": "inference._types"
               }
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L67-L75",
-      "variants": {
-        "kind": "container"
-      }
+      "specLocation": "inference/_types/Results.ts#L84-L89"
     },
     {
       "kind": "interface",
-      "description": "The text embedding result object",
+      "description": "The completion result object",
       "name": {
-        "name": "TextEmbeddingResult",
+        "name": "CompletionResult",
         "namespace": "inference._types"
       },
       "properties": [
         {
-          "name": "embedding",
+          "name": "result",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "DenseVector",
-              "namespace": "inference._types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L60-L65"
+      "specLocation": "inference/_types/Results.ts#L77-L82"
     },
     {
       "kind": "interface",
       "description": "A list of tools that the model can call.",
       "name": {
         "name": "CompletionTool",
-        "namespace": "inference.chat_completion_unified"
+        "namespace": "inference._types"
       },
       "properties": [
         {
@@ -150314,168 +150656,1967 @@
             "kind": "instance_of",
             "type": {
               "name": "CompletionToolFunction",
-              "namespace": "inference.chat_completion_unified"
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L206-L218"
+    },
+    {
+      "kind": "interface",
+      "description": "Controls which tool is called by the model.",
+      "name": {
+        "name": "CompletionToolChoice",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The type of the tool.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool choice function.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolChoiceFunction",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L169-L181"
+    },
+    {
+      "kind": "interface",
+      "description": "The tool choice function.",
+      "name": {
+        "name": "CompletionToolChoiceFunction",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L158-L167"
+    },
+    {
+      "kind": "interface",
+      "description": "The completion tool function definition.",
+      "name": {
+        "name": "CompletionToolFunction",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+          "name": "description",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the function.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+          "name": "parameters",
+          "required": false,
+          "type": {
+            "kind": "user_defined_value"
+          }
+        },
+        {
+          "description": "Whether to enable schema adherence when generating the function call.",
+          "name": "strict",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L183-L204"
+    },
+    {
+      "kind": "type_alias",
+      "codegenNames": [
+        "string",
+        "object"
+      ],
+      "name": {
+        "name": "CompletionToolType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L80-L83",
+      "type": {
+        "kind": "union_of",
+        "items": [
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolChoice",
+              "namespace": "inference._types"
+            }
+          }
+        ]
+      }
+    },
+    {
+      "kind": "interface",
+      "description": "An object style representation of a single portion of a conversation.",
+      "name": {
+        "name": "ContentObject",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The text content.",
+          "name": "text",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The type of content.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L85-L97"
+    },
+    {
+      "kind": "interface",
+      "description": "Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint",
+      "inherits": {
+        "type": {
+          "name": "AcknowledgedResponseBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "DeleteInferenceEndpointResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "pipelines",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L110-L115"
+    },
+    {
+      "kind": "type_alias",
+      "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.",
+      "name": {
+        "name": "DenseByteVector",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/Results.ts#L47-L51",
+      "type": {
+        "kind": "array_of",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "byte",
+            "namespace": "_types"
+          }
+        }
+      }
+    },
+    {
+      "kind": "type_alias",
+      "description": "Text Embedding results are represented as Dense Vectors\nof floats.",
+      "name": {
+        "name": "DenseVector",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/Results.ts#L30-L34",
+      "type": {
+        "kind": "array_of",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "float",
+            "namespace": "_types"
+          }
+        }
+      }
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "EisServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The name of the model to use for the inference task.",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L685-L695"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elastic"
+        }
+      ],
+      "name": {
+        "name": "EisServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L701-L703"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "chat_completion"
+        }
+      ],
+      "name": {
+        "name": "EisTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L697-L699"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ElasticsearchServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
+          "name": "adaptive_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "AdaptiveAllocations",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The deployment identifier for a trained model deployment.\nWhen `deployment_id` is used the `model_id` is optional.",
+          "name": "deployment_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nIt can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.",
+          "extDocId": "eland-import",
+          "extDocUrl": "https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html#ml-nlp-import-script",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The total number of allocations that are assigned to the model across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations are enabled, do not set this value because it's automatically set.",
+          "name": "num_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of threads used by each model allocation during inference.\nThis setting generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.",
+          "name": "num_threads",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L705-L739"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elasticsearch"
+        }
+      ],
+      "name": {
+        "name": "ElasticsearchServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L755-L757"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ElasticsearchTaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "For a `rerank` task, return the document instead of only the index.",
+          "name": "return_documents",
+          "required": false,
+          "serverDefault": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L741-L747"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "sparse_embedding"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "ElasticsearchTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L749-L753"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ElserServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
+          "name": "adaptive_allocations",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "AdaptiveAllocations",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
+          "name": "num_allocations",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
+          "name": "num_threads",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L759-L785"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "elser"
+        }
+      ],
+      "name": {
+        "name": "ElserServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L791-L793"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "sparse_embedding"
+        }
+      ],
+      "name": {
+        "name": "ElserTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L787-L789"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "googleaistudio"
+        }
+      ],
+      "name": {
+        "name": "GoogleAiServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L818-L820"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "GoogleAiStudioServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid API key of your Google Gemini account.",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
+          "extDocId": "googleaistudio-models",
+          "extDocUrl": "https://ai.google.dev/gemini-api/docs/models",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Google AI Studio.\nBy default, the `googleaistudio` service sets the number of requests allowed per minute to 360.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L795-L811"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "GoogleAiStudioTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L813-L816"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "GoogleVertexAIServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.",
+          "extDocId": "googlevertexai-locations",
+          "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations",
+          "name": "location",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
+          "extDocId": "googlevertexai-models",
+          "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the project to use for the inference task.",
+          "name": "project_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.\nBy default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "A valid service account in JSON format for the Google Vertex AI API.",
+          "name": "service_account_json",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L822-L848"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "googlevertexai"
+        }
+      ],
+      "name": {
+        "name": "GoogleVertexAIServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L866-L868"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "GoogleVertexAITaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.",
+          "name": "auto_truncate",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "For a `rerank` task, the number of the top N documents that should be returned.",
+          "name": "top_n",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L850-L859"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "GoogleVertexAITaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L861-L864"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "HuggingFaceServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "huggingface-tokens",
+          "extDocUrl": "https://huggingface.co/settings/tokens",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The URL endpoint to use for the requests.",
+          "name": "url",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L870-L891"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "hugging_face"
+        }
+      ],
+      "name": {
+        "name": "HuggingFaceServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L897-L899"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "HuggingFaceTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L893-L895"
+    },
+    {
+      "kind": "interface",
+      "description": "Chunking configuration object",
+      "name": {
+        "name": "InferenceChunkingSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The maximum size of a chunk in words.\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).",
+          "name": "max_chunk_size",
+          "required": false,
+          "serverDefault": 250,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.",
+          "name": "overlap",
+          "required": false,
+          "serverDefault": 100,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.",
+          "name": "sentence_overlap",
+          "required": false,
+          "serverDefault": 1,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The chunking strategy: `sentence` or `word`.",
+          "name": "strategy",
+          "required": false,
+          "serverDefault": "sentence",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L60-L89"
+    },
+    {
+      "kind": "interface",
+      "description": "Configuration options when storing the inference endpoint",
+      "name": {
+        "name": "InferenceEndpoint",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Chunking configuration object",
+          "name": "chunking_settings",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "InferenceChunkingSettings",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The service type",
+          "name": "service",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "Settings specific to the service",
+          "name": "service_settings",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ServiceSettings",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "Task settings specific to the service and task type",
+          "name": "task_settings",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskSettings",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L24-L44"
+    },
+    {
+      "kind": "interface",
+      "description": "Represents an inference endpoint as returned by the GET API",
+      "inherits": {
+        "type": {
+          "name": "InferenceEndpoint",
+          "namespace": "inference._types"
+        }
+      },
+      "name": {
+        "name": "InferenceEndpointInfo",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The task type",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskType",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L46-L58"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "JinaAIServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "jinaAi-embeddings",
+          "extDocUrl": "https://jina.ai/embeddings/",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.",
+          "name": "model_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from JinaAI.\nBy default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.",
+          "extDocId": "jinaAi-rate-limit",
+          "extDocUrl": "https://jina.ai/contact-sales/#rate-limit",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.\nThe default values varies with the embedding type.\nFor example, a float embedding type uses a `dot_product` similarity measure by default.",
+          "name": "similarity",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "JinaAISimilarityType",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L901-L930"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "jinaai"
+        }
+      ],
+      "name": {
+        "name": "JinaAIServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L960-L962"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "cosine"
+        },
+        {
+          "name": "dot_product"
+        },
+        {
+          "name": "l2_norm"
+        }
+      ],
+      "name": {
+        "name": "JinaAISimilarityType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L964-L968"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "JinaAITaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "For a `rerank` task, return the doc text within the results.",
+          "name": "return_documents",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "For a `text_embedding` task, the task passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.",
+          "name": "task",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "JinaAITextEmbeddingTask",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
+          "name": "top_n",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L932-L953"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "JinaAITaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L955-L958"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "classification"
+        },
+        {
+          "name": "clustering"
+        },
+        {
+          "name": "ingest"
+        },
+        {
+          "name": "search"
+        }
+      ],
+      "name": {
+        "name": "JinaAITextEmbeddingTask",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L970-L975"
+    },
+    {
+      "kind": "interface",
+      "description": "An object representing part of the conversation.",
+      "name": {
+        "name": "Message",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The content of the message.",
+          "name": "content",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "MessageContent",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The role of the message author.",
+          "name": "role",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool call that this message is responding to.",
+          "name": "tool_call_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The tool calls generated by the model.",
+          "name": "tool_calls",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ToolCall",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L136-L156"
+    },
+    {
+      "kind": "type_alias",
+      "codegenNames": [
+        "string",
+        "object"
+      ],
+      "name": {
+        "name": "MessageContent",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L131-L134",
+      "type": {
+        "kind": "union_of",
+        "items": [
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          },
+          {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContentObject",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        ]
+      }
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "MistralServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "mistral-api-keys",
+          "extDocUrl": "https://console.mistral.ai/api-keys/",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The maximum number of tokens per input before chunking occurs.",
+          "name": "max_input_tokens",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.",
+          "extDocId": "mistral-api-models",
+          "extDocUrl": "https://docs.mistral.ai/getting-started/models/",
+          "name": "model",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from the Mistral API.\nBy default, the `mistral` service sets the number of requests allowed per minute to 240.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L977-L1004"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "mistral"
+        }
+      ],
+      "name": {
+        "name": "MistralServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1010-L1012"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "MistralTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1006-L1008"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "OpenAIServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid API key of your OpenAI account.\nYou can find your OpenAI API keys in your OpenAI account under the API keys section.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "openai-api-keys",
+          "extDocUrl": "https://platform.openai.com/api-keys",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The number of dimensions the resulting output embeddings should have.\nIt is supported only in `text-embedding-3` and later models.\nIf it is not set, the OpenAI defined default for the model is used.",
+          "name": "dimensions",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the OpenAI documentation for the list of available text embedding models.",
+          "extDocId": "openai-models",
+          "extDocUrl": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier for your organization.\nYou can find the Organization ID in your OpenAI account under *Settings > Organizations*.",
+          "name": "organization_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from OpenAI.\nThe `openai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `3000`.\nFor `completion`, it is set to `500`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The URL endpoint to use for the requests.\nIt can be changed for testing purposes.",
+          "name": "url",
+          "required": false,
+          "serverDefault": "https://api.openai.com/v1/embeddings.",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L1014-L1056"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "openai"
+        }
+      ],
+      "name": {
+        "name": "OpenAIServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1072-L1074"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "OpenAITaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
+          "name": "user",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L1058-L1064"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "chat_completion"
+        },
+        {
+          "name": "completion"
+        },
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "OpenAITaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1066-L1070"
+    },
+    {
+      "kind": "interface",
+      "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested",
+      "name": {
+        "name": "RankedDocument",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "index",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "name": "relevance_score",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "name": "text",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L91-L101"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "RateLimitSetting",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The number of requests allowed per minute.",
+          "name": "requests_per_minute",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L95-L100"
+    },
+    {
+      "kind": "interface",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "RequestChatCompletionBase",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A list of objects representing the conversation.",
+          "name": "messages",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "Message",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "description": "The ID of the model to use.",
+          "name": "model",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+          "name": "max_completion_tokens",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "long",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+          "name": "stop",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          }
+        },
+        {
+          "description": "The sampling temperature to use.",
+          "name": "temperature",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Controls which tool is called by the model.",
+          "name": "tool_choice",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "A list of tools that the model can call.",
+          "name": "tools",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionTool",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "description": "Nucleus sampling, an alternative to sampling with temperature.",
+          "name": "top_p",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192"
+      "specLocation": "inference/_types/CommonTypes.ts#L26-L59"
     },
     {
       "kind": "interface",
-      "description": "Controls which tool is called by the model.",
+      "description": "Defines the response for a rerank request.",
       "name": {
-        "name": "CompletionToolChoice",
-        "namespace": "inference.chat_completion_unified"
+        "name": "RerankedInferenceResult",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The type of the tool.",
-          "name": "type",
+          "name": "rerank",
           "required": true,
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "RankedDocument",
+                "namespace": "inference._types"
+              }
             }
           }
-        },
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L103-L108"
+    },
+    {
+      "kind": "type_alias",
+      "name": {
+        "name": "ServiceSettings",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/Services.ts#L91-L91",
+      "type": {
+        "kind": "user_defined_value"
+      }
+    },
+    {
+      "kind": "interface",
+      "description": "The response format for the sparse embedding request.",
+      "name": {
+        "name": "SparseEmbeddingInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
         {
-          "description": "The tool choice function.",
-          "name": "function",
+          "name": "sparse_embedding",
           "required": true,
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "CompletionToolChoiceFunction",
-              "namespace": "inference.chat_completion_unified"
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "SparseEmbeddingResult",
+                "namespace": "inference._types"
+              }
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155"
+      "specLocation": "inference/_types/Results.ts#L40-L45"
     },
     {
       "kind": "interface",
-      "description": "The tool choice function.",
       "name": {
-        "name": "CompletionToolChoiceFunction",
-        "namespace": "inference.chat_completion_unified"
+        "name": "SparseEmbeddingResult",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The name of the function to call.",
-          "name": "name",
+          "name": "embedding",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "SparseVector",
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141"
+      "specLocation": "inference/_types/Results.ts#L36-L38"
+    },
+    {
+      "kind": "type_alias",
+      "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.",
+      "name": {
+        "name": "SparseVector",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/Results.ts#L24-L28",
+      "type": {
+        "kind": "dictionary_of",
+        "key": {
+          "kind": "instance_of",
+          "type": {
+            "name": "string",
+            "namespace": "_builtins"
+          }
+        },
+        "singleKey": false,
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "float",
+            "namespace": "_types"
+          }
+        }
+      }
+    },
+    {
+      "kind": "type_alias",
+      "name": {
+        "name": "TaskSettings",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/Services.ts#L93-L93",
+      "type": {
+        "kind": "user_defined_value"
+      }
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "sparse_embedding"
+        },
+        {
+          "name": "text_embedding"
+        },
+        {
+          "name": "rerank"
+        },
+        {
+          "name": "completion"
+        },
+        {
+          "name": "chat_completion"
+        }
+      ],
+      "name": {
+        "name": "TaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/TaskType.ts#L20-L29"
     },
     {
       "kind": "interface",
-      "description": "The completion tool function definition.",
+      "description": "The text embedding result object for byte representation",
       "name": {
-        "name": "CompletionToolFunction",
-        "namespace": "inference.chat_completion_unified"
+        "name": "TextEmbeddingByteResult",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-          "name": "description",
-          "required": false,
+          "name": "embedding",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "DenseByteVector",
+              "namespace": "inference._types"
             }
           }
-        },
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L53-L58"
+    },
+    {
+      "kind": "interface",
+      "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
+      "name": {
+        "name": "TextEmbeddingInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
         {
-          "description": "The name of the function.",
-          "name": "name",
-          "required": true,
+          "name": "text_embedding_bytes",
+          "required": false,
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
             }
           }
         },
         {
-          "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-          "name": "parameters",
+          "name": "text_embedding_bits",
           "required": false,
           "type": {
-            "kind": "user_defined_value"
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
           }
         },
         {
-          "description": "Whether to enable schema adherence when generating the function call.",
-          "name": "strict",
+          "name": "text_embedding",
           "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L67-L75",
+      "variants": {
+        "kind": "container"
+      }
+    },
+    {
+      "kind": "interface",
+      "description": "The text embedding result object",
+      "name": {
+        "name": "TextEmbeddingResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "embedding",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
+              "name": "DenseVector",
+              "namespace": "inference._types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178"
+      "specLocation": "inference/_types/Results.ts#L60-L65"
     },
     {
-      "kind": "type_alias",
-      "codegenNames": [
-        "string",
-        "object"
-      ],
+      "kind": "interface",
+      "description": "A tool call generated by the model.",
       "name": {
-        "name": "CompletionToolType",
-        "namespace": "inference.chat_completion_unified"
+        "name": "ToolCall",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57",
-      "type": {
-        "kind": "union_of",
-        "items": [
-          {
+      "properties": [
+        {
+          "description": "The identifier of the tool call.",
+          "name": "id",
+          "required": true,
+          "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "Id",
+              "namespace": "_types"
             }
-          },
-          {
+          }
+        },
+        {
+          "description": "The function that the model called.",
+          "name": "function",
+          "required": true,
+          "type": {
             "kind": "instance_of",
             "type": {
-              "name": "CompletionToolChoice",
-              "namespace": "inference.chat_completion_unified"
+              "name": "ToolCallFunction",
+              "namespace": "inference._types"
             }
           }
-        ]
-      }
+        },
+        {
+          "description": "The type of the tool call.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L113-L129"
     },
     {
       "kind": "interface",
-      "description": "An object style representation of a single portion of a conversation.",
+      "description": "The function that the model called.",
       "name": {
-        "name": "ContentObject",
-        "namespace": "inference.chat_completion_unified"
+        "name": "ToolCallFunction",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The text content.",
-          "name": "text",
+          "description": "The arguments to call the function with in JSON format.",
+          "name": "arguments",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -150486,8 +152627,8 @@
           }
         },
         {
-          "description": "The type of content.",
-          "name": "type",
+          "description": "The name of the function to call.",
+          "name": "name",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -150498,31 +152639,34 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71"
+      "specLocation": "inference/_types/CommonTypes.ts#L99-L111"
     },
     {
       "kind": "interface",
-      "description": "An object representing part of the conversation.",
       "name": {
-        "name": "Message",
-        "namespace": "inference.chat_completion_unified"
+        "name": "VoyageAIServiceSettings",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The content of the message.",
-          "name": "content",
+          "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.",
+          "extDocId": "voyageai-embeddings",
+          "extDocUrl": "https://docs.voyageai.com/docs/embeddings",
+          "name": "dimensions",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "MessageContent",
-              "namespace": "inference.chat_completion_unified"
+              "name": "integer",
+              "namespace": "_types"
             }
           }
         },
         {
-          "description": "The role of the message author.",
-          "name": "role",
+          "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.",
+          "extDocId": "voyageai-rerank",
+          "extDocUrl": "https://docs.voyageai.com/docs/reranker",
+          "name": "model_id",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -150533,174 +152677,199 @@
           }
         },
         {
-          "description": "The tool call that this message is responding to.",
-          "name": "tool_call_id",
+          "description": "This setting helps to minimize the number of rate limit errors returned from VoyageAI.\nThe `voyageai` service sets a default number of requests allowed per minute depending on the task type.\nFor both `text_embedding` and `rerank`, it is set to `2000`.",
+          "name": "rate_limit",
           "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Id",
-              "namespace": "_types"
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
             }
           }
         },
         {
-          "description": "The tool calls generated by the model.",
-          "name": "tool_calls",
+          "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.",
+          "extDocId": "voyageai-embeddings",
+          "extDocUrl": "https://docs.voyageai.com/docs/embeddings",
+          "name": "embedding_type",
           "required": false,
           "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "ToolCall",
-                "namespace": "inference.chat_completion_unified"
-              }
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130"
+      "specLocation": "inference/_types/CommonTypes.ts#L1076-L1107"
     },
     {
-      "kind": "type_alias",
-      "codegenNames": [
-        "string",
-        "object"
+      "kind": "enum",
+      "members": [
+        {
+          "name": "voyageai"
+        }
       ],
       "name": {
-        "name": "MessageContent",
-        "namespace": "inference.chat_completion_unified"
+        "name": "VoyageAIServiceType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108",
-      "type": {
-        "kind": "union_of",
-        "items": [
-          {
+      "specLocation": "inference/_types/CommonTypes.ts#L1140-L1142"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "VoyageAITaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.",
+          "name": "input_type",
+          "required": false,
+          "type": {
             "kind": "instance_of",
             "type": {
               "name": "string",
               "namespace": "_builtins"
             }
-          },
-          {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "ContentObject",
-                "namespace": "inference.chat_completion_unified"
-              }
+          }
+        },
+        {
+          "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.",
+          "name": "return_documents",
+          "required": false,
+          "serverDefault": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
             }
           }
-        ]
-      }
-    },
-    {
-      "kind": "request",
-      "attachedBehaviors": [
-        "CommonQueryParameters"
-      ],
-      "body": {
-        "kind": "properties",
-        "properties": []
-      },
-      "description": "Perform chat completion inference",
-      "inherits": {
-        "type": {
-          "name": "RequestChatCompletionBase",
-          "namespace": "inference._types"
-        }
-      },
-      "name": {
-        "name": "Request",
-        "namespace": "inference.chat_completion_unified"
-      },
-      "path": [
+        },
         {
-          "description": "The inference Id",
-          "name": "inference_id",
-          "required": true,
+          "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.",
+          "name": "top_k",
+          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Id",
+              "name": "integer",
               "namespace": "_types"
             }
           }
-        }
-      ],
-      "query": [
+        },
         {
-          "description": "Specifies the amount of time to wait for the inference request to complete.",
-          "name": "timeout",
+          "description": "Whether to truncate the input texts to fit within the context length.",
+          "name": "truncation",
           "required": false,
-          "serverDefault": "30s",
+          "serverDefault": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Duration",
-              "namespace": "_types"
+              "name": "boolean",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52"
+      "specLocation": "inference/_types/CommonTypes.ts#L1109-L1133"
     },
     {
-      "kind": "response",
-      "body": {
-        "kind": "value",
-        "codegenName": "data",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "StreamResult",
-            "namespace": "_types"
-          }
+      "kind": "enum",
+      "members": [
+        {
+          "name": "text_embedding"
+        },
+        {
+          "name": "rerank"
         }
-      },
+      ],
       "name": {
-        "name": "Response",
-        "namespace": "inference.chat_completion_unified"
+        "name": "VoyageAITaskType",
+        "namespace": "inference._types"
       },
-      "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L25"
+      "specLocation": "inference/_types/CommonTypes.ts#L1135-L1138"
     },
     {
       "kind": "interface",
-      "description": "A tool call generated by the model.",
       "name": {
-        "name": "ToolCall",
-        "namespace": "inference.chat_completion_unified"
+        "name": "WatsonxServiceSettings",
+        "namespace": "inference._types"
       },
       "properties": [
         {
-          "description": "The identifier of the tool call.",
-          "name": "id",
+          "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "watsonx-api-keys",
+          "extDocUrl": "https://cloud.ibm.com/iam/apikeys",
+          "name": "api_key",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Id",
-              "namespace": "_types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The function that the model called.",
-          "name": "function",
+          "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.",
+          "extDocId": "watsonx-api-version",
+          "extDocUrl": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates",
+          "name": "api_version",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "ToolCallFunction",
-              "namespace": "inference.chat_completion_unified"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The type of the tool call.",
-          "name": "type",
+          "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.",
+          "extDocId": "watsonx-api-models",
+          "extDocUrl": "https://www.ibm.com/products/watsonx-ai/foundation-models",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The identifier of the IBM Cloud project to use for the inference task.",
+          "name": "project_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The URL of the inference endpoint that you created on Watsonx.",
+          "name": "url",
           "required": true,
           "type": {
             "kind": "instance_of",
@@ -150711,42 +152880,110 @@
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103"
+      "specLocation": "inference/_types/CommonTypes.ts#L1144-L1181"
     },
     {
-      "kind": "interface",
-      "description": "The function that the model called.",
+      "kind": "enum",
+      "members": [
+        {
+          "name": "watsonxai"
+        }
+      ],
       "name": {
-        "name": "ToolCallFunction",
+        "name": "WatsonxServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1187-L1189"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "text_embedding"
+        }
+      ],
+      "name": {
+        "name": "WatsonxTaskType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1183-L1185"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "value",
+        "codegenName": "chat_completion_request",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "RequestChatCompletionBase",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "description": "Perform chat completion inference",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
         "namespace": "inference.chat_completion_unified"
       },
-      "properties": [
+      "path": [
         {
-          "description": "The arguments to call the function with in JSON format.",
-          "name": "arguments",
+          "description": "The inference Id",
+          "name": "inference_id",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "Id",
+              "namespace": "_types"
             }
           }
-        },
+        }
+      ],
+      "query": [
         {
-          "description": "The name of the function to call.",
-          "name": "name",
-          "required": true,
+          "description": "Specifies the amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "Duration",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L24-L53"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "codegenName": "data",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "StreamResult",
+            "namespace": "_types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L25"
     },
     {
       "kind": "request",
@@ -151047,14 +153284,21 @@
         "CommonQueryParameters"
       ],
       "body": {
-        "kind": "properties",
-        "properties": []
+        "kind": "value",
+        "codegenName": "chat_completion_request",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "RequestChatCompletionBase",
+            "namespace": "inference._types"
+          }
+        }
       },
       "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.",
       "inherits": {
         "type": {
-          "name": "RequestChatCompletionBase",
-          "namespace": "inference._types"
+          "name": "RequestBase",
+          "namespace": "_types"
         }
       },
       "name": {
@@ -151076,7 +153320,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46"
+      "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L48"
     },
     {
       "kind": "response",
@@ -151172,134 +153416,6 @@
       },
       "specLocation": "inference/put/PutResponse.ts#L22-L25"
     },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AlibabaCloudServiceSettings",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "properties": [
-        {
-          "description": "A valid API key for the AlibabaCloud AI Search API.",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.",
-          "extDocId": "alibabacloud-api-keys",
-          "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key",
-          "name": "host",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`",
-          "name": "service_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the workspace used for the inference task.",
-          "name": "workspace",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AlibabaCloudTaskSettings",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "properties": [
-        {
-          "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.",
-          "name": "input_type",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.",
-          "name": "return_token",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "space_embedding"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "AlibabaCloudTaskType",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -151329,8 +153445,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_alibabacloud"
+                "name": "AlibabaCloudServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -151342,7 +153458,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AlibabaCloudServiceSettings",
-                "namespace": "inference.put_alibabacloud"
+                "namespace": "inference._types"
               }
             }
           },
@@ -151351,271 +153467,95 @@
             "name": "task_settings",
             "required": false,
             "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "AlibabaCloudTaskSettings",
-                "namespace": "inference.put_alibabacloud"
-              }
-            }
-          }
-        ]
-      },
-      "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "examples": {
-        "PutAlibabaCloudRequestExample1": {
-          "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.",
-          "summary": "A completion task",
-          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-qwen-turbo\",\n        \"workspace\" : \"default\"\n    }\n}"
-        },
-        "PutAlibabaCloudRequestExample2": {
-          "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.",
-          "summary": "A rerank task",
-          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-bge-reranker-larger\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
-        },
-        "PutAlibabaCloudRequestExample3": {
-          "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.",
-          "summary": "A sparse embedding task",
-          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-text-sparse-embedding-001\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
-        },
-        "PutAlibabaCloudRequestExample4": {
-          "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.",
-          "summary": "A text embedding task",
-          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-text-embedding-001\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
-        }
-      },
-      "inherits": {
-        "type": {
-          "name": "RequestBase",
-          "namespace": "_types"
-        }
-      },
-      "name": {
-        "name": "Request",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "path": [
-        {
-          "description": "The type of the inference task that the model will perform.",
-          "name": "task_type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "AlibabaCloudTaskType",
-              "namespace": "inference.put_alibabacloud"
-            }
-          }
-        },
-        {
-          "description": "The unique identifier of the inference endpoint.",
-          "name": "alibabacloud_inference_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "Id",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "query": [],
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80"
-    },
-    {
-      "kind": "response",
-      "body": {
-        "kind": "value",
-        "codegenName": "endpoint_info",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "InferenceEndpointInfo",
-            "namespace": "inference._types"
-          }
-        }
-      },
-      "name": {
-        "name": "Response",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L25"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "alibabacloud-ai-search"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_alibabacloud"
-      },
-      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AmazonBedrockServiceSettings",
-        "namespace": "inference.put_amazonbedrock"
-      },
-      "properties": [
-        {
-          "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.",
-          "name": "access_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.",
-          "extDocId": "amazonbedrock-models",
-          "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html",
-          "name": "model",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only",
-          "name": "provider",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.",
-          "extDocId": "amazonbedrock-models",
-          "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html",
-          "name": "region",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.",
-          "extDocId": "amazonbedrock-secret-keys",
-          "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html",
-          "name": "secret_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AmazonBedrockTaskSettings",
-        "namespace": "inference.put_amazonbedrock"
-      },
-      "properties": [
-        {
-          "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.",
-          "name": "max_new_tokens",
-          "required": false,
-          "serverDefault": 64,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "kind": "instance_of",
+              "type": {
+                "name": "AlibabaCloudTaskSettings",
+                "namespace": "inference._types"
+              }
             }
           }
+        ]
+      },
+      "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "examples": {
+        "PutAlibabaCloudRequestExample1": {
+          "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.",
+          "summary": "A completion task",
+          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-qwen-turbo\",\n        \"workspace\" : \"default\"\n    }\n}"
         },
-        {
-          "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.",
-          "name": "temperature",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
+        "PutAlibabaCloudRequestExample2": {
+          "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.",
+          "summary": "A rerank task",
+          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-bge-reranker-larger\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
+        },
+        "PutAlibabaCloudRequestExample3": {
+          "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.",
+          "summary": "A sparse embedding task",
+          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-text-sparse-embedding-001\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
         },
+        "PutAlibabaCloudRequestExample4": {
+          "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.",
+          "summary": "A text embedding task",
+          "value": "{\n    \"service\": \"alibabacloud-ai-search\",\n    \"service_settings\": {\n        \"api_key\": \"AlibabaCloud-API-Key\",\n        \"service_id\": \"ops-text-embedding-001\",\n        \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n        \"workspace\": \"default\"\n    }\n}"
+        }
+      },
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_alibabacloud"
+      },
+      "path": [
         {
-          "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.",
-          "name": "top_k",
-          "required": false,
+          "description": "The type of the inference task that the model will perform.",
+          "name": "task_type",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "float",
-              "namespace": "_types"
+              "name": "AlibabaCloudTaskType",
+              "namespace": "inference._types"
             }
           }
         },
         {
-          "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.",
-          "name": "top_p",
-          "required": false,
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "alibabacloud_inference_id",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "float",
+              "name": "Id",
               "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163"
+      "query": [],
+      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L30-L83"
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "codegenName": "endpoint_info",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
         }
-      ],
+      },
       "name": {
-        "name": "AmazonBedrockTaskType",
-        "namespace": "inference.put_amazonbedrock"
+        "name": "Response",
+        "namespace": "inference.put_alibabacloud"
       },
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89"
+      "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L25"
     },
     {
       "kind": "request",
@@ -151646,8 +153586,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_amazonbedrock"
+                "name": "AmazonBedrockServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -151659,7 +153599,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AmazonBedrockServiceSettings",
-                "namespace": "inference.put_amazonbedrock"
+                "namespace": "inference._types"
               }
             }
           },
@@ -151671,7 +153611,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AmazonBedrockTaskSettings",
-                "namespace": "inference.put_amazonbedrock"
+                "namespace": "inference._types"
               }
             }
           }
@@ -151709,7 +153649,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AmazonBedrockTaskType",
-              "namespace": "inference.put_amazonbedrock"
+              "namespace": "inference._types"
             }
           }
         },
@@ -151727,7 +153667,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84"
+      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L30-L86"
     },
     {
       "kind": "response",
@@ -151748,139 +153688,6 @@
       },
       "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "amazonbedrock"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_amazonbedrock"
-      },
-      "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AnthropicServiceSettings",
-        "namespace": "inference.put_anthropic"
-      },
-      "properties": [
-        {
-          "description": "A valid API key for the Anthropic API.",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.",
-          "extDocId": "anothropic-models",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AnthropicTaskSettings",
-        "namespace": "inference.put_anthropic"
-      },
-      "properties": [
-        {
-          "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.",
-          "name": "max_tokens",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.",
-          "extDocId": "anthropic-messages",
-          "extDocUrl": "https://docs.anthropic.com/en/api/messages",
-          "name": "temperature",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.",
-          "name": "top_k",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.",
-          "name": "top_p",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        }
-      ],
-      "name": {
-        "name": "AnthropicTaskType",
-        "namespace": "inference.put_anthropic"
-      },
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -151910,8 +153717,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_anthropic"
+                "name": "AnthropicServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -151923,7 +153730,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AnthropicServiceSettings",
-                "namespace": "inference.put_anthropic"
+                "namespace": "inference._types"
               }
             }
           },
@@ -151935,7 +153742,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AnthropicTaskSettings",
-                "namespace": "inference.put_anthropic"
+                "namespace": "inference._types"
               }
             }
           }
@@ -151967,7 +153774,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AnthropicTaskType",
-              "namespace": "inference.put_anthropic"
+              "namespace": "inference._types"
             }
           }
         },
@@ -151985,7 +153792,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82"
+      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L30-L84"
     },
     {
       "kind": "response",
@@ -152006,180 +153813,6 @@
       },
       "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "anthropic"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_anthropic"
-      },
-      "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AzureAiStudioServiceSettings",
-        "namespace": "inference.put_azureaistudio"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "azureaistudio-api-keys",
-          "extDocUrl": "https://ai.azure.com/",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.",
-          "extDocId": "azureaistudio-endpoint-types",
-          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio",
-          "name": "endpoint_type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.",
-          "name": "target",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types",
-          "name": "provider",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AzureAiStudioTaskSettings",
-        "namespace": "inference.put_azureaistudio"
-      },
-      "properties": [
-        {
-          "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.",
-          "name": "do_sample",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.",
-          "name": "max_new_tokens",
-          "required": false,
-          "serverDefault": 64,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.",
-          "name": "temperature",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.",
-          "name": "top_p",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
-          "name": "user",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "AzureAiStudioTaskType",
-        "namespace": "inference.put_azureaistudio"
-      },
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -152209,8 +153842,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_azureaistudio"
+                "name": "AzureAiStudioServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -152222,7 +153855,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureAiStudioServiceSettings",
-                "namespace": "inference.put_azureaistudio"
+                "namespace": "inference._types"
               }
             }
           },
@@ -152234,7 +153867,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureAiStudioTaskSettings",
-                "namespace": "inference.put_azureaistudio"
+                "namespace": "inference._types"
               }
             }
           }
@@ -152272,7 +153905,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AzureAiStudioTaskType",
-              "namespace": "inference.put_azureaistudio"
+              "namespace": "inference._types"
             }
           }
         },
@@ -152290,7 +153923,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81"
+      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L30-L83"
     },
     {
       "kind": "response",
@@ -152311,149 +153944,6 @@
       },
       "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "azureaistudio"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_azureaistudio"
-      },
-      "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AzureOpenAIServiceSettings",
-        "namespace": "inference.put_azureopenai"
-      },
-      "properties": [
-        {
-          "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "azureopenai-auth",
-          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication",
-          "name": "api_key",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.",
-          "name": "api_version",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.",
-          "extDocId": "azureopenai",
-          "extDocUrl": "https://oai.azure.com/",
-          "name": "deployment_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.",
-          "extDocId": "azureopenai-auth",
-          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication",
-          "name": "entra_id",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.",
-          "extDocId": "azureopenai-quota-limits",
-          "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.",
-          "extDocId": "azureopenai-portal",
-          "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll",
-          "name": "resource_name",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "AzureOpenAITaskSettings",
-        "namespace": "inference.put_azureopenai"
-      },
-      "properties": [
-        {
-          "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
-          "name": "user",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "AzureOpenAITaskType",
-        "namespace": "inference.put_azureopenai"
-      },
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -152483,8 +153973,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_azureopenai"
+                "name": "AzureOpenAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -152496,7 +153986,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureOpenAIServiceSettings",
-                "namespace": "inference.put_azureopenai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -152508,7 +153998,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "AzureOpenAITaskSettings",
-                "namespace": "inference.put_azureopenai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -152546,7 +154036,7 @@
             "kind": "instance_of",
             "type": {
               "name": "AzureOpenAITaskType",
-              "namespace": "inference.put_azureopenai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -152564,7 +154054,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88"
+      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L30-L91"
     },
     {
       "kind": "response",
@@ -152585,210 +154075,6 @@
       },
       "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "azureopenai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_azureopenai"
-      },
-      "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "CohereServiceSettings",
-        "namespace": "inference.put_cohere"
-      },
-      "properties": [
-        {
-          "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "cohere-api-keys",
-          "extDocUrl": "https://dashboard.cohere.com/api-keys",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.",
-          "name": "embedding_type",
-          "required": false,
-          "serverDefault": "float",
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "EmbeddingType",
-              "namespace": "inference.put_cohere"
-            }
-          }
-        },
-        {
-          "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.",
-          "name": "model_id",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.",
-          "name": "similarity",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "SimilarityType",
-              "namespace": "inference.put_cohere"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "CohereTaskSettings",
-        "namespace": "inference.put_cohere"
-      },
-      "properties": [
-        {
-          "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.",
-          "name": "input_type",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "InputType",
-              "namespace": "inference.put_cohere"
-            }
-          }
-        },
-        {
-          "description": "For a `rerank` task, return doc text within the results.",
-          "name": "return_documents",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
-          "name": "top_n",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.",
-          "name": "truncate",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "TruncateType",
-              "namespace": "inference.put_cohere"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "CohereTaskType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "byte"
-        },
-        {
-          "name": "float"
-        },
-        {
-          "name": "int8"
-        }
-      ],
-      "name": {
-        "name": "EmbeddingType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "classification"
-        },
-        {
-          "name": "clustering"
-        },
-        {
-          "name": "ingest"
-        },
-        {
-          "name": "search"
-        }
-      ],
-      "name": {
-        "name": "InputType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -152818,8 +154104,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_cohere"
+                "name": "CohereServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -152831,7 +154117,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "CohereServiceSettings",
-                "namespace": "inference.put_cohere"
+                "namespace": "inference._types"
               }
             }
           },
@@ -152843,7 +154129,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "CohereTaskSettings",
-                "namespace": "inference.put_cohere"
+                "namespace": "inference._types"
               }
             }
           }
@@ -152881,7 +154167,7 @@
             "kind": "instance_of",
             "type": {
               "name": "CohereTaskType",
-              "namespace": "inference.put_cohere"
+              "namespace": "inference._types"
             }
           }
         },
@@ -152899,7 +154185,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82"
+      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L30-L84"
     },
     {
       "kind": "response",
@@ -152920,104 +154206,6 @@
       },
       "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "cohere"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "cosine"
-        },
-        {
-          "name": "dot_product"
-        },
-        {
-          "name": "l2_norm"
-        }
-      ],
-      "name": {
-        "name": "SimilarityType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "END"
-        },
-        {
-          "name": "NONE"
-        },
-        {
-          "name": "START"
-        }
-      ],
-      "name": {
-        "name": "TruncateType",
-        "namespace": "inference.put_cohere"
-      },
-      "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "EisServiceSettings",
-        "namespace": "inference.put_eis"
-      },
-      "properties": [
-        {
-          "description": "The name of the model to use for the inference task.",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "chat_completion"
-        }
-      ],
-      "name": {
-        "name": "EisTaskType",
-        "namespace": "inference.put_eis"
-      },
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -153033,8 +154221,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_eis"
+                "name": "EisServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -153046,7 +154234,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "EisServiceSettings",
-                "namespace": "inference.put_eis"
+                "namespace": "inference._types"
               }
             }
           }
@@ -153072,7 +154260,7 @@
             "kind": "instance_of",
             "type": {
               "name": "EisTaskType",
-              "namespace": "inference.put_eis"
+              "namespace": "inference._types"
             }
           }
         },
@@ -153090,7 +154278,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62"
+      "specLocation": "inference/put_eis/PutEisRequest.ts#L28-L66"
     },
     {
       "kind": "response",
@@ -153111,133 +154299,6 @@
       },
       "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "elastic"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_eis"
-      },
-      "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "ElasticsearchServiceSettings",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "properties": [
-        {
-          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
-          "name": "adaptive_allocations",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "AdaptiveAllocations",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The deployment identifier for a trained model deployment.\nWhen `deployment_id` is used the `model_id` is optional.",
-          "name": "deployment_id",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nIt can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.",
-          "extDocId": "eland-import",
-          "extDocUrl": "https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html#ml-nlp-import-script",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The total number of allocations that are assigned to the model across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations are enabled, do not set this value because it's automatically set.",
-          "name": "num_allocations",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "The number of threads used by each model allocation during inference.\nThis setting generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.",
-          "name": "num_threads",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L99-L133"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "ElasticsearchTaskSettings",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "properties": [
-        {
-          "description": "For a `rerank` task, return the document instead of only the index.",
-          "name": "return_documents",
-          "required": false,
-          "serverDefault": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L135-L141"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "sparse_embedding"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "ElasticsearchTaskType",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L89-L93"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -153267,8 +154328,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_elasticsearch"
+                "name": "ElasticsearchServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -153280,7 +154341,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElasticsearchServiceSettings",
-                "namespace": "inference.put_elasticsearch"
+                "namespace": "inference._types"
               }
             }
           },
@@ -153292,7 +154353,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElasticsearchTaskSettings",
-                "namespace": "inference.put_elasticsearch"
+                "namespace": "inference._types"
               }
             }
           }
@@ -153335,137 +154396,65 @@
         "type": {
           "name": "RequestBase",
           "namespace": "_types"
-        }
-      },
-      "name": {
-        "name": "Request",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "path": [
-        {
-          "description": "The type of the inference task that the model will perform.",
-          "name": "task_type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "ElasticsearchTaskType",
-              "namespace": "inference.put_elasticsearch"
-            }
-          }
-        },
-        {
-          "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.",
-          "name": "elasticsearch_inference_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "Id",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "query": [],
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L26-L87"
-    },
-    {
-      "kind": "response",
-      "body": {
-        "kind": "value",
-        "codegenName": "endpoint_info",
-        "value": {
-          "kind": "instance_of",
-          "type": {
-            "name": "InferenceEndpointInfo",
-            "namespace": "inference._types"
-          }
-        }
-      },
-      "examples": {
-        "PutElasticsearchResponseExample1": {
-          "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n",
-          "value": "{\n  \"inference_id\": \"use_existing_deployment\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elasticsearch\",\n  \"service_settings\": {\n    \"num_allocations\": 2,\n    \"num_threads\": 1,\n    \"model_id\": \".elser_model_2\",\n    \"deployment_id\": \".elser_model_2\"\n  },\n  \"chunking_settings\": {\n    \"strategy\": \"sentence\",\n    \"max_chunk_size\": 250,\n    \"sentence_overlap\": 1\n  }\n}"
-        }
-      },
-      "name": {
-        "name": "Response",
-        "namespace": "inference.put_elasticsearch"
-      },
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L25"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "elasticsearch"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_elasticsearch"
+        }
       },
-      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L95-L97"
-    },
-    {
-      "kind": "interface",
       "name": {
-        "name": "ElserServiceSettings",
-        "namespace": "inference.put_elser"
+        "name": "Request",
+        "namespace": "inference.put_elasticsearch"
       },
-      "properties": [
-        {
-          "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.",
-          "name": "adaptive_allocations",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "AdaptiveAllocations",
-              "namespace": "inference._types"
-            }
-          }
-        },
+      "path": [
         {
-          "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.",
-          "name": "num_allocations",
+          "description": "The type of the inference task that the model will perform.",
+          "name": "task_type",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
-              "namespace": "_types"
+              "name": "ElasticsearchTaskType",
+              "namespace": "inference._types"
             }
           }
         },
         {
-          "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.",
-          "name": "num_threads",
+          "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.",
+          "name": "elasticsearch_inference_id",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "integer",
+              "name": "Id",
               "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L93-L119"
+      "query": [],
+      "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L30-L91"
     },
     {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "sparse_embedding"
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "codegenName": "endpoint_info",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfo",
+            "namespace": "inference._types"
+          }
         }
-      ],
+      },
+      "examples": {
+        "PutElasticsearchResponseExample1": {
+          "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n",
+          "value": "{\n  \"inference_id\": \"use_existing_deployment\",\n  \"task_type\": \"sparse_embedding\",\n  \"service\": \"elasticsearch\",\n  \"service_settings\": {\n    \"num_allocations\": 2,\n    \"num_threads\": 1,\n    \"model_id\": \".elser_model_2\",\n    \"deployment_id\": \".elser_model_2\"\n  },\n  \"chunking_settings\": {\n    \"strategy\": \"sentence\",\n    \"max_chunk_size\": 250,\n    \"sentence_overlap\": 1\n  }\n}"
+        }
+      },
       "name": {
-        "name": "ElserTaskType",
-        "namespace": "inference.put_elser"
+        "name": "Response",
+        "namespace": "inference.put_elasticsearch"
       },
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L85-L87"
+      "specLocation": "inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L25"
     },
     {
       "kind": "request",
@@ -153496,8 +154485,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_elser"
+                "name": "ElserServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -153509,7 +154498,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "ElserServiceSettings",
-                "namespace": "inference.put_elser"
+                "namespace": "inference._types"
               }
             }
           }
@@ -153551,7 +154540,7 @@
             "kind": "instance_of",
             "type": {
               "name": "ElserTaskType",
-              "namespace": "inference.put_elser"
+              "namespace": "inference._types"
             }
           }
         },
@@ -153569,7 +154558,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L26-L83"
+      "specLocation": "inference/put_elser/PutElserRequest.ts#L29-L86"
     },
     {
       "kind": "response",
@@ -153596,83 +154585,6 @@
       },
       "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "elser"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_elser"
-      },
-      "specLocation": "inference/put_elser/PutElserRequest.ts#L89-L91"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "GoogleAiStudioServiceSettings",
-        "namespace": "inference.put_googleaistudio"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your Google Gemini account.",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
-          "extDocId": "googleaistudio-models",
-          "extDocUrl": "https://ai.google.dev/gemini-api/docs/models",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Google AI Studio.\nBy default, the `googleaistudio` service sets the number of requests allowed per minute to 360.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L86-L102"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "GoogleAiStudioTaskType",
-        "namespace": "inference.put_googleaistudio"
-      },
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -153702,8 +154614,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_googleaistudio"
+                "name": "GoogleAiServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -153715,7 +154627,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleAiStudioServiceSettings",
-                "namespace": "inference.put_googleaistudio"
+                "namespace": "inference._types"
               }
             }
           }
@@ -153748,7 +154660,7 @@
             "kind": "instance_of",
             "type": {
               "name": "GoogleAiStudioTaskType",
-              "namespace": "inference.put_googleaistudio"
+              "namespace": "inference._types"
             }
           }
         },
@@ -153766,7 +154678,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75"
+      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L29-L77"
     },
     {
       "kind": "response",
@@ -153787,143 +154699,6 @@
       },
       "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "googleaistudio"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_googleaistudio"
-      },
-      "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "GoogleVertexAIServiceSettings",
-        "namespace": "inference.put_googlevertexai"
-      },
-      "properties": [
-        {
-          "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.",
-          "extDocId": "googlevertexai-locations",
-          "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations",
-          "name": "location",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.",
-          "extDocId": "googlevertexai-models",
-          "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the project to use for the inference task.",
-          "name": "project_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.\nBy default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "A valid service account in JSON format for the Google Vertex AI API.",
-          "name": "service_account_json",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "GoogleVertexAITaskSettings",
-        "namespace": "inference.put_googlevertexai"
-      },
-      "properties": [
-        {
-          "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.",
-          "name": "auto_truncate",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "For a `rerank` task, the number of the top N documents that should be returned.",
-          "name": "top_n",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "GoogleVertexAITaskType",
-        "namespace": "inference.put_googlevertexai"
-      },
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -153953,8 +154728,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_googlevertexai"
+                "name": "GoogleVertexAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -153966,7 +154741,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleVertexAIServiceSettings",
-                "namespace": "inference.put_googlevertexai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -153978,7 +154753,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "GoogleVertexAITaskSettings",
-                "namespace": "inference.put_googlevertexai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -154016,7 +154791,7 @@
             "kind": "instance_of",
             "type": {
               "name": "GoogleVertexAITaskType",
-              "namespace": "inference.put_googlevertexai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -154034,7 +154809,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81"
+      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L30-L83"
     },
     {
       "kind": "response",
@@ -154055,80 +154830,6 @@
       },
       "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "googlevertexai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_googlevertexai"
-      },
-      "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "HuggingFaceServiceSettings",
-        "namespace": "inference.put_hugging_face"
-      },
-      "properties": [
-        {
-          "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "huggingface-tokens",
-          "extDocUrl": "https://huggingface.co/settings/tokens",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The URL endpoint to use for the requests.",
-          "name": "url",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "HuggingFaceTaskType",
-        "namespace": "inference.put_hugging_face"
-      },
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L91-L93"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -154158,8 +154859,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_hugging_face"
+                "name": "HuggingFaceServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -154171,7 +154872,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "HuggingFaceServiceSettings",
-                "namespace": "inference.put_hugging_face"
+                "namespace": "inference._types"
               }
             }
           }
@@ -154204,7 +154905,7 @@
             "kind": "instance_of",
             "type": {
               "name": "HuggingFaceTaskType",
-              "namespace": "inference.put_hugging_face"
+              "namespace": "inference._types"
             }
           }
         },
@@ -154222,7 +154923,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89"
+      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L91"
     },
     {
       "kind": "response",
@@ -154243,143 +154944,6 @@
       },
       "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "hugging_face"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_hugging_face"
-      },
-      "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L95-L97"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "JinaAIServiceSettings",
-        "namespace": "inference.put_jinaai"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "jinaAi-embeddings",
-          "extDocUrl": "https://jina.ai/embeddings/",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.",
-          "name": "model_id",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from JinaAI.\nBy default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.",
-          "extDocId": "jinaAi-rate-limit",
-          "extDocUrl": "https://jina.ai/contact-sales/#rate-limit",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.\nThe default values varies with the embedding type.\nFor example, a float embedding type uses a `dot_product` similarity measure by default.",
-          "name": "similarity",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "SimilarityType",
-              "namespace": "inference.put_jinaai"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L108-L137"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "JinaAITaskSettings",
-        "namespace": "inference.put_jinaai"
-      },
-      "properties": [
-        {
-          "description": "For a `rerank` task, return the doc text within the results.",
-          "name": "return_documents",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "For a `text_embedding` task, the task passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.",
-          "name": "task",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "TextEmbeddingTask",
-              "namespace": "inference.put_jinaai"
-            }
-          }
-        },
-        {
-          "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.",
-          "name": "top_n",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L139-L160"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "rerank"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "JinaAITaskType",
-        "namespace": "inference.put_jinaai"
-      },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L86-L89"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -154409,8 +154973,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_jinaai"
+                "name": "JinaAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -154422,7 +154986,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "JinaAIServiceSettings",
-                "namespace": "inference.put_jinaai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -154434,7 +154998,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "JinaAITaskSettings",
-                "namespace": "inference.put_jinaai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -154472,7 +155036,7 @@
             "kind": "instance_of",
             "type": {
               "name": "JinaAITaskType",
-              "namespace": "inference.put_jinaai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -154490,7 +155054,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84"
+      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L30-L86"
     },
     {
       "kind": "response",
@@ -154511,135 +155075,6 @@
       },
       "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "jinaai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_jinaai"
-      },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L91-L93"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "cosine"
-        },
-        {
-          "name": "dot_product"
-        },
-        {
-          "name": "l2_norm"
-        }
-      ],
-      "name": {
-        "name": "SimilarityType",
-        "namespace": "inference.put_jinaai"
-      },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L95-L99"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "classification"
-        },
-        {
-          "name": "clustering"
-        },
-        {
-          "name": "ingest"
-        },
-        {
-          "name": "search"
-        }
-      ],
-      "name": {
-        "name": "TextEmbeddingTask",
-        "namespace": "inference.put_jinaai"
-      },
-      "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L101-L106"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "MistralServiceSettings",
-        "namespace": "inference.put_mistral"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your Mistral account.\nYou can find your Mistral API keys or you can create a new one on the API Keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "mistral-api-keys",
-          "extDocUrl": "https://console.mistral.ai/api-keys/",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The maximum number of tokens per input before chunking occurs.",
-          "name": "max_input_tokens",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the Mistral models documentation for the list of available text embedding models.",
-          "extDocId": "mistral-api-models",
-          "extDocUrl": "https://docs.mistral.ai/getting-started/models/",
-          "name": "model",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from the Mistral API.\nBy default, the `mistral` service sets the number of requests allowed per minute to 240.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "MistralTaskType",
-        "namespace": "inference.put_mistral"
-      },
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L79-L81"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -154669,8 +155104,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_mistral"
+                "name": "MistralServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -154682,7 +155117,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "MistralServiceSettings",
-                "namespace": "inference.put_mistral"
+                "namespace": "inference._types"
               }
             }
           }
@@ -154714,7 +155149,7 @@
             "kind": "instance_of",
             "type": {
               "name": "MistralTaskType",
-              "namespace": "inference.put_mistral"
+              "namespace": "inference._types"
             }
           }
         },
@@ -154732,7 +155167,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77"
+      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L29-L78"
     },
     {
       "kind": "response",
@@ -154753,147 +155188,6 @@
       },
       "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "mistral"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_mistral"
-      },
-      "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "OpenAIServiceSettings",
-        "namespace": "inference.put_openai"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your OpenAI account.\nYou can find your OpenAI API keys in your OpenAI account under the API keys section.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "openai-api-keys",
-          "extDocUrl": "https://platform.openai.com/api-keys",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The number of dimensions the resulting output embeddings should have.\nIt is supported only in `text-embedding-3` and later models.\nIf it is not set, the OpenAI defined default for the model is used.",
-          "name": "dimensions",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the OpenAI documentation for the list of available text embedding models.",
-          "extDocId": "openai-models",
-          "extDocUrl": "https://platform.openai.com/docs/guides/embeddings/what-are-embeddings",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The unique identifier for your organization.\nYou can find the Organization ID in your OpenAI account under *Settings > Organizations*.",
-          "name": "organization_id",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from OpenAI.\nThe `openai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `3000`.\nFor `completion`, it is set to `500`.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The URL endpoint to use for the requests.\nIt can be changed for testing purposes.",
-          "name": "url",
-          "required": false,
-          "serverDefault": "https://api.openai.com/v1/embeddings.",
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "OpenAITaskSettings",
-        "namespace": "inference.put_openai"
-      },
-      "properties": [
-        {
-          "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.",
-          "name": "user",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L138-L144"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "chat_completion"
-        },
-        {
-          "name": "completion"
-        },
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "OpenAITaskType",
-        "namespace": "inference.put_openai"
-      },
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L84-L88"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -154923,8 +155217,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_openai"
+                "name": "OpenAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -154936,7 +155230,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "OpenAIServiceSettings",
-                "namespace": "inference.put_openai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -154948,7 +155242,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "OpenAITaskSettings",
-                "namespace": "inference.put_openai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -154986,7 +155280,7 @@
             "kind": "instance_of",
             "type": {
               "name": "OpenAITaskType",
-              "namespace": "inference.put_openai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -155004,7 +155298,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82"
+      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L30-L84"
     },
     {
       "kind": "response",
@@ -155025,19 +155319,6 @@
       },
       "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "openai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_openai"
-      },
-      "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L90-L92"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -155067,8 +155348,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_voyageai"
+                "name": "VoyageAIServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -155080,7 +155361,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "VoyageAIServiceSettings",
-                "namespace": "inference.put_voyageai"
+                "namespace": "inference._types"
               }
             }
           },
@@ -155092,7 +155373,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "VoyageAITaskSettings",
-                "namespace": "inference.put_voyageai"
+                "namespace": "inference._types"
               }
             }
           }
@@ -155130,7 +155411,7 @@
             "kind": "instance_of",
             "type": {
               "name": "VoyageAITaskType",
-              "namespace": "inference.put_voyageai"
+              "namespace": "inference._types"
             }
           }
         },
@@ -155148,7 +155429,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L28-L77"
+      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L30-L79"
     },
     {
       "kind": "response",
@@ -155169,159 +155450,6 @@
       },
       "specLocation": "inference/put_voyageai/PutVoyageAIResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "voyageai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_voyageai"
-      },
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L84-L86"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "VoyageAIServiceSettings",
-        "namespace": "inference.put_voyageai"
-      },
-      "properties": [
-        {
-          "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.",
-          "extDocId": "voyageai-embeddings",
-          "extDocUrl": "https://docs.voyageai.com/docs/embeddings",
-          "name": "dimensions",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.",
-          "extDocId": "voyageai-rerank",
-          "extDocUrl": "https://docs.voyageai.com/docs/reranker",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from VoyageAI.\nThe `voyageai` service sets a default number of requests allowed per minute depending on the task type.\nFor both `text_embedding` and `rerank`, it is set to `2000`.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.",
-          "extDocId": "voyageai-embeddings",
-          "extDocUrl": "https://docs.voyageai.com/docs/embeddings",
-          "name": "embedding_type",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L88-L119"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "VoyageAITaskSettings",
-        "namespace": "inference.put_voyageai"
-      },
-      "properties": [
-        {
-          "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.",
-          "name": "input_type",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.",
-          "name": "return_documents",
-          "required": false,
-          "serverDefault": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.",
-          "name": "top_k",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "integer",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "Whether to truncate the input texts to fit within the context length.",
-          "name": "truncation",
-          "required": false,
-          "serverDefault": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L121-L145"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "text_embedding"
-        },
-        {
-          "name": "rerank"
-        }
-      ],
-      "name": {
-        "name": "VoyageAITaskType",
-        "namespace": "inference.put_voyageai"
-      },
-      "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L79-L82"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
@@ -155337,8 +155465,8 @@
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "ServiceType",
-                "namespace": "inference.put_watsonx"
+                "name": "WatsonxServiceType",
+                "namespace": "inference._types"
               }
             }
           },
@@ -155350,7 +155478,7 @@
               "kind": "instance_of",
               "type": {
                 "name": "WatsonxServiceSettings",
-                "namespace": "inference.put_watsonx"
+                "namespace": "inference._types"
               }
             }
           }
@@ -155382,7 +155510,7 @@
             "kind": "instance_of",
             "type": {
               "name": "WatsonxTaskType",
-              "namespace": "inference.put_watsonx"
+              "namespace": "inference._types"
             }
           }
         },
@@ -155400,7 +155528,7 @@
         }
       ],
       "query": [],
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70"
+      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L28-L74"
     },
     {
       "kind": "response",
@@ -155421,120 +155549,6 @@
       },
       "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L25"
     },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "watsonxai"
-        }
-      ],
-      "name": {
-        "name": "ServiceType",
-        "namespace": "inference.put_watsonx"
-      },
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L76-L78"
-    },
-    {
-      "kind": "interface",
-      "name": {
-        "name": "WatsonxServiceSettings",
-        "namespace": "inference.put_watsonx"
-      },
-      "properties": [
-        {
-          "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
-          "extDocId": "watsonx-api-keys",
-          "extDocUrl": "https://cloud.ibm.com/iam/apikeys",
-          "name": "api_key",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.",
-          "extDocId": "watsonx-api-version",
-          "extDocUrl": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates",
-          "name": "api_version",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.",
-          "extDocId": "watsonx-api-models",
-          "extDocUrl": "https://www.ibm.com/products/watsonx-ai/foundation-models",
-          "name": "model_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The identifier of the IBM Cloud project to use for the inference task.",
-          "name": "project_id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.",
-          "name": "rate_limit",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "RateLimitSetting",
-              "namespace": "inference._types"
-            }
-          }
-        },
-        {
-          "description": "The URL of the inference endpoint that you created on Watsonx.",
-          "name": "url",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L80-L117"
-    },
-    {
-      "kind": "enum",
-      "members": [
-        {
-          "name": "text_embedding"
-        }
-      ],
-      "name": {
-        "name": "WatsonxTaskType",
-        "namespace": "inference.put_watsonx"
-      },
-      "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L72-L74"
-    },
     {
       "kind": "request",
       "attachedBehaviors": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index d0ddbb2dc7..7d5e82ba29 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13070,6 +13070,124 @@ export interface InferenceAdaptiveAllocations {
   min_number_of_allocations?: integer
 }
 
+export interface InferenceAlibabaCloudServiceSettings {
+  api_key: string
+  host: string
+  rate_limit?: InferenceRateLimitSetting
+  service_id: string
+  workspace: string
+}
+
+export type InferenceAlibabaCloudServiceType = 'alibabacloud-ai-search'
+
+export interface InferenceAlibabaCloudTaskSettings {
+  input_type?: string
+  return_token?: boolean
+}
+
+export type InferenceAlibabaCloudTaskType = 'completion' | 'rerank' | 'space_embedding' | 'text_embedding'
+
+export interface InferenceAmazonBedrockServiceSettings {
+  access_key: string
+  model: string
+  provider?: string
+  region: string
+  rate_limit?: InferenceRateLimitSetting
+  secret_key: string
+}
+
+export type InferenceAmazonBedrockServiceType = 'amazonbedrock'
+
+export interface InferenceAmazonBedrockTaskSettings {
+  max_new_tokens?: integer
+  temperature?: float
+  top_k?: float
+  top_p?: float
+}
+
+export type InferenceAmazonBedrockTaskType = 'completion' | 'text_embedding'
+
+export interface InferenceAnthropicServiceSettings {
+  api_key: string
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceAnthropicServiceType = 'anthropic'
+
+export interface InferenceAnthropicTaskSettings {
+  max_tokens: integer
+  temperature?: float
+  top_k?: integer
+  top_p?: float
+}
+
+export type InferenceAnthropicTaskType = 'completion'
+
+export interface InferenceAzureAiStudioServiceSettings {
+  api_key: string
+  endpoint_type: string
+  target: string
+  provider: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceAzureAiStudioServiceType = 'azureaistudio'
+
+export interface InferenceAzureAiStudioTaskSettings {
+  do_sample?: float
+  max_new_tokens?: integer
+  temperature?: float
+  top_p?: float
+  user?: string
+}
+
+export type InferenceAzureAiStudioTaskType = 'completion' | 'text_embedding'
+
+export interface InferenceAzureOpenAIServiceSettings {
+  api_key?: string
+  api_version: string
+  deployment_id: string
+  entra_id?: string
+  rate_limit?: InferenceRateLimitSetting
+  resource_name: string
+}
+
+export type InferenceAzureOpenAIServiceType = 'azureopenai'
+
+export interface InferenceAzureOpenAITaskSettings {
+  user?: string
+}
+
+export type InferenceAzureOpenAITaskType = 'completion' | 'text_embedding'
+
+export type InferenceCohereEmbeddingType = 'byte' | 'float' | 'int8'
+
+export type InferenceCohereInputType = 'classification' | 'clustering' | 'ingest' | 'search'
+
+export interface InferenceCohereServiceSettings {
+  api_key: string
+  embedding_type?: InferenceCohereEmbeddingType
+  model_id?: string
+  rate_limit?: InferenceRateLimitSetting
+  similarity?: InferenceCohereSimilarityType
+}
+
+export type InferenceCohereServiceType = 'cohere'
+
+export type InferenceCohereSimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
+
+export interface InferenceCohereTaskSettings {
+  input_type?: InferenceCohereInputType
+  return_documents?: boolean
+  top_n?: integer
+  truncate?: InferenceCohereTruncateType
+}
+
+export type InferenceCohereTaskType = 'completion' | 'rerank' | 'text_embedding'
+
+export type InferenceCohereTruncateType = 'END' | 'NONE' | 'START'
+
 export interface InferenceCompletionInferenceResult {
   completion: InferenceCompletionResult[]
 }
@@ -13078,6 +13196,34 @@ export interface InferenceCompletionResult {
   result: string
 }
 
+export interface InferenceCompletionTool {
+  type: string
+  function: InferenceCompletionToolFunction
+}
+
+export interface InferenceCompletionToolChoice {
+  type: string
+  function: InferenceCompletionToolChoiceFunction
+}
+
+export interface InferenceCompletionToolChoiceFunction {
+  name: string
+}
+
+export interface InferenceCompletionToolFunction {
+  description?: string
+  name: string
+  parameters?: any
+  strict?: boolean
+}
+
+export type InferenceCompletionToolType = string | InferenceCompletionToolChoice
+
+export interface InferenceContentObject {
+  text: string
+  type: string
+}
+
 export interface InferenceDeleteInferenceEndpointResult extends AcknowledgedResponseBase {
   pipelines: string[]
 }
@@ -13086,6 +13232,78 @@ export type InferenceDenseByteVector = byte[]
 
 export type InferenceDenseVector = float[]
 
+export interface InferenceEisServiceSettings {
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceEisServiceType = 'elastic'
+
+export type InferenceEisTaskType = 'chat_completion'
+
+export interface InferenceElasticsearchServiceSettings {
+  adaptive_allocations?: InferenceAdaptiveAllocations
+  deployment_id?: string
+  model_id: string
+  num_allocations?: integer
+  num_threads: integer
+}
+
+export type InferenceElasticsearchServiceType = 'elasticsearch'
+
+export interface InferenceElasticsearchTaskSettings {
+  return_documents?: boolean
+}
+
+export type InferenceElasticsearchTaskType = 'rerank' | 'sparse_embedding' | 'text_embedding'
+
+export interface InferenceElserServiceSettings {
+  adaptive_allocations?: InferenceAdaptiveAllocations
+  num_allocations: integer
+  num_threads: integer
+}
+
+export type InferenceElserServiceType = 'elser'
+
+export type InferenceElserTaskType = 'sparse_embedding'
+
+export type InferenceGoogleAiServiceType = 'googleaistudio'
+
+export interface InferenceGoogleAiStudioServiceSettings {
+  api_key: string
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding'
+
+export interface InferenceGoogleVertexAIServiceSettings {
+  location: string
+  model_id: string
+  project_id: string
+  rate_limit?: InferenceRateLimitSetting
+  service_account_json: string
+}
+
+export type InferenceGoogleVertexAIServiceType = 'googlevertexai'
+
+export interface InferenceGoogleVertexAITaskSettings {
+  auto_truncate?: boolean
+  top_n?: integer
+}
+
+export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding'
+
+export interface InferenceHuggingFaceServiceSettings {
+  api_key: string
+  rate_limit?: InferenceRateLimitSetting
+  url: string
+}
+
+export type InferenceHuggingFaceServiceType = 'hugging_face'
+
+export type InferenceHuggingFaceTaskType = 'text_embedding'
+
 export interface InferenceInferenceChunkingSettings {
   max_chunk_size?: integer
   overlap?: integer
@@ -13105,6 +13323,64 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi
   task_type: InferenceTaskType
 }
 
+export interface InferenceJinaAIServiceSettings {
+  api_key: string
+  model_id?: string
+  rate_limit?: InferenceRateLimitSetting
+  similarity?: InferenceJinaAISimilarityType
+}
+
+export type InferenceJinaAIServiceType = 'jinaai'
+
+export type InferenceJinaAISimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
+
+export interface InferenceJinaAITaskSettings {
+  return_documents?: boolean
+  task?: InferenceJinaAITextEmbeddingTask
+  top_n?: integer
+}
+
+export type InferenceJinaAITaskType = 'rerank' | 'text_embedding'
+
+export type InferenceJinaAITextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search'
+
+export interface InferenceMessage {
+  content?: InferenceMessageContent
+  role: string
+  tool_call_id?: Id
+  tool_calls?: InferenceToolCall[]
+}
+
+export type InferenceMessageContent = string | InferenceContentObject[]
+
+export interface InferenceMistralServiceSettings {
+  api_key: string
+  max_input_tokens?: integer
+  model: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceMistralServiceType = 'mistral'
+
+export type InferenceMistralTaskType = 'text_embedding'
+
+export interface InferenceOpenAIServiceSettings {
+  api_key: string
+  dimensions?: integer
+  model_id: string
+  organization_id?: string
+  rate_limit?: InferenceRateLimitSetting
+  url?: string
+}
+
+export type InferenceOpenAIServiceType = 'openai'
+
+export interface InferenceOpenAITaskSettings {
+  user?: string
+}
+
+export type InferenceOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
+
 export interface InferenceRankedDocument {
   index: integer
   relevance_score: float
@@ -13116,13 +13392,13 @@ export interface InferenceRateLimitSetting {
 }
 
 export interface InferenceRequestChatCompletionBase extends RequestBase {
-  messages: InferenceChatCompletionUnifiedMessage[]
+  messages: InferenceMessage[]
   model?: string
   max_completion_tokens?: long
   stop?: string[]
   temperature?: float
-  tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
-  tools?: InferenceChatCompletionUnifiedCompletionTool[]
+  tool_choice?: InferenceCompletionToolType
+  tools?: InferenceCompletionTool[]
   top_p?: float
 }
 
@@ -13160,61 +13436,56 @@ export interface InferenceTextEmbeddingResult {
   embedding: InferenceDenseVector
 }
 
-export interface InferenceChatCompletionUnifiedCompletionTool {
-  type: string
-  function: InferenceChatCompletionUnifiedCompletionToolFunction
-}
-
-export interface InferenceChatCompletionUnifiedCompletionToolChoice {
+export interface InferenceToolCall {
+  id: Id
+  function: InferenceToolCallFunction
   type: string
-  function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction
 }
 
-export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction {
+export interface InferenceToolCallFunction {
+  arguments: string
   name: string
 }
 
-export interface InferenceChatCompletionUnifiedCompletionToolFunction {
-  description?: string
-  name: string
-  parameters?: any
-  strict?: boolean
+export interface InferenceVoyageAIServiceSettings {
+  dimensions?: integer
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+  embedding_type?: float
 }
 
-export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice
+export type InferenceVoyageAIServiceType = 'voyageai'
 
-export interface InferenceChatCompletionUnifiedContentObject {
-  text: string
-  type: string
+export interface InferenceVoyageAITaskSettings {
+  input_type?: string
+  return_documents?: boolean
+  top_k?: integer
+  truncation?: boolean
 }
 
-export interface InferenceChatCompletionUnifiedMessage {
-  content?: InferenceChatCompletionUnifiedMessageContent
-  role: string
-  tool_call_id?: Id
-  tool_calls?: InferenceChatCompletionUnifiedToolCall[]
+export type InferenceVoyageAITaskType = 'text_embedding' | 'rerank'
+
+export interface InferenceWatsonxServiceSettings {
+  api_key: string
+  api_version: string
+  model_id: string
+  project_id: string
+  rate_limit?: InferenceRateLimitSetting
+  url: string
 }
 
-export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[]
+export type InferenceWatsonxServiceType = 'watsonxai'
+
+export type InferenceWatsonxTaskType = 'text_embedding'
 
-export interface InferenceChatCompletionUnifiedRequest extends InferenceRequestChatCompletionBase {
+export interface InferenceChatCompletionUnifiedRequest extends RequestBase {
   inference_id: Id
   timeout?: Duration
+  body?: InferenceRequestChatCompletionBase
 }
 
 export type InferenceChatCompletionUnifiedResponse = StreamResult
 
-export interface InferenceChatCompletionUnifiedToolCall {
-  id: Id
-  function: InferenceChatCompletionUnifiedToolCallFunction
-  type: string
-}
-
-export interface InferenceChatCompletionUnifiedToolCallFunction {
-  arguments: string
-  name: string
-}
-
 export interface InferenceCompletionRequest extends RequestBase {
   inference_id: Id
   timeout?: Duration
@@ -13244,8 +13515,9 @@ export interface InferenceGetResponse {
   endpoints: InferenceInferenceEndpointInfo[]
 }
 
-export interface InferencePostEisChatCompletionRequest extends InferenceRequestChatCompletionBase {
+export interface InferencePostEisChatCompletionRequest extends RequestBase {
   eis_inference_id: Id
+  body?: InferenceRequestChatCompletionBase
 }
 
 export type InferencePostEisChatCompletionResponse = StreamResult
@@ -13258,489 +13530,219 @@ export interface InferencePutRequest extends RequestBase {
 
 export type InferencePutResponse = InferenceInferenceEndpointInfo
 
-export interface InferencePutAlibabacloudAlibabaCloudServiceSettings {
-  api_key: string
-  host: string
-  rate_limit?: InferenceRateLimitSetting
-  service_id: string
-  workspace: string
-}
-
-export interface InferencePutAlibabacloudAlibabaCloudTaskSettings {
-  input_type?: string
-  return_token?: boolean
-}
-
-export type InferencePutAlibabacloudAlibabaCloudTaskType = 'completion' | 'rerank' | 'space_embedding' | 'text_embedding'
-
 export interface InferencePutAlibabacloudRequest extends RequestBase {
-  task_type: InferencePutAlibabacloudAlibabaCloudTaskType
+  task_type: InferenceAlibabaCloudTaskType
   alibabacloud_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutAlibabacloudServiceType
-    service_settings: InferencePutAlibabacloudAlibabaCloudServiceSettings
-    task_settings?: InferencePutAlibabacloudAlibabaCloudTaskSettings
+    service: InferenceAlibabaCloudServiceType
+    service_settings: InferenceAlibabaCloudServiceSettings
+    task_settings?: InferenceAlibabaCloudTaskSettings
   }
 }
 
 export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search'
-
-export interface InferencePutAmazonbedrockAmazonBedrockServiceSettings {
-  access_key: string
-  model: string
-  provider?: string
-  region: string
-  rate_limit?: InferenceRateLimitSetting
-  secret_key: string
-}
-
-export interface InferencePutAmazonbedrockAmazonBedrockTaskSettings {
-  max_new_tokens?: integer
-  temperature?: float
-  top_k?: float
-  top_p?: float
-}
-
-export type InferencePutAmazonbedrockAmazonBedrockTaskType = 'completion' | 'text_embedding'
-
 export interface InferencePutAmazonbedrockRequest extends RequestBase {
-  task_type: InferencePutAmazonbedrockAmazonBedrockTaskType
+  task_type: InferenceAmazonBedrockTaskType
   amazonbedrock_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutAmazonbedrockServiceType
-    service_settings: InferencePutAmazonbedrockAmazonBedrockServiceSettings
-    task_settings?: InferencePutAmazonbedrockAmazonBedrockTaskSettings
+    service: InferenceAmazonBedrockServiceType
+    service_settings: InferenceAmazonBedrockServiceSettings
+    task_settings?: InferenceAmazonBedrockTaskSettings
   }
 }
 
 export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutAmazonbedrockServiceType = 'amazonbedrock'
-
-export interface InferencePutAnthropicAnthropicServiceSettings {
-  api_key: string
-  model_id: string
-  rate_limit?: InferenceRateLimitSetting
-}
-
-export interface InferencePutAnthropicAnthropicTaskSettings {
-  max_tokens: integer
-  temperature?: float
-  top_k?: integer
-  top_p?: float
-}
-
-export type InferencePutAnthropicAnthropicTaskType = 'completion'
-
 export interface InferencePutAnthropicRequest extends RequestBase {
-  task_type: InferencePutAnthropicAnthropicTaskType
+  task_type: InferenceAnthropicTaskType
   anthropic_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutAnthropicServiceType
-    service_settings: InferencePutAnthropicAnthropicServiceSettings
-    task_settings?: InferencePutAnthropicAnthropicTaskSettings
+    service: InferenceAnthropicServiceType
+    service_settings: InferenceAnthropicServiceSettings
+    task_settings?: InferenceAnthropicTaskSettings
   }
 }
 
 export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutAnthropicServiceType = 'anthropic'
-
-export interface InferencePutAzureaistudioAzureAiStudioServiceSettings {
-  api_key: string
-  endpoint_type: string
-  target: string
-  provider: string
-  rate_limit?: InferenceRateLimitSetting
-}
-
-export interface InferencePutAzureaistudioAzureAiStudioTaskSettings {
-  do_sample?: float
-  max_new_tokens?: integer
-  temperature?: float
-  top_p?: float
-  user?: string
-}
-
-export type InferencePutAzureaistudioAzureAiStudioTaskType = 'completion' | 'text_embedding'
-
 export interface InferencePutAzureaistudioRequest extends RequestBase {
-  task_type: InferencePutAzureaistudioAzureAiStudioTaskType
+  task_type: InferenceAzureAiStudioTaskType
   azureaistudio_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutAzureaistudioServiceType
-    service_settings: InferencePutAzureaistudioAzureAiStudioServiceSettings
-    task_settings?: InferencePutAzureaistudioAzureAiStudioTaskSettings
+    service: InferenceAzureAiStudioServiceType
+    service_settings: InferenceAzureAiStudioServiceSettings
+    task_settings?: InferenceAzureAiStudioTaskSettings
   }
 }
 
 export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutAzureaistudioServiceType = 'azureaistudio'
-
-export interface InferencePutAzureopenaiAzureOpenAIServiceSettings {
-  api_key?: string
-  api_version: string
-  deployment_id: string
-  entra_id?: string
-  rate_limit?: InferenceRateLimitSetting
-  resource_name: string
-}
-
-export interface InferencePutAzureopenaiAzureOpenAITaskSettings {
-  user?: string
-}
-
-export type InferencePutAzureopenaiAzureOpenAITaskType = 'completion' | 'text_embedding'
-
 export interface InferencePutAzureopenaiRequest extends RequestBase {
-  task_type: InferencePutAzureopenaiAzureOpenAITaskType
+  task_type: InferenceAzureOpenAITaskType
   azureopenai_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutAzureopenaiServiceType
-    service_settings: InferencePutAzureopenaiAzureOpenAIServiceSettings
-    task_settings?: InferencePutAzureopenaiAzureOpenAITaskSettings
+    service: InferenceAzureOpenAIServiceType
+    service_settings: InferenceAzureOpenAIServiceSettings
+    task_settings?: InferenceAzureOpenAITaskSettings
   }
 }
 
 export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutAzureopenaiServiceType = 'azureopenai'
-
-export interface InferencePutCohereCohereServiceSettings {
-  api_key: string
-  embedding_type?: InferencePutCohereEmbeddingType
-  model_id?: string
-  rate_limit?: InferenceRateLimitSetting
-  similarity?: InferencePutCohereSimilarityType
-}
-
-export interface InferencePutCohereCohereTaskSettings {
-  input_type?: InferencePutCohereInputType
-  return_documents?: boolean
-  top_n?: integer
-  truncate?: InferencePutCohereTruncateType
-}
-
-export type InferencePutCohereCohereTaskType = 'completion' | 'rerank' | 'text_embedding'
-
-export type InferencePutCohereEmbeddingType = 'byte' | 'float' | 'int8'
-
-export type InferencePutCohereInputType = 'classification' | 'clustering' | 'ingest' | 'search'
-
 export interface InferencePutCohereRequest extends RequestBase {
-  task_type: InferencePutCohereCohereTaskType
+  task_type: InferenceCohereTaskType
   cohere_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutCohereServiceType
-    service_settings: InferencePutCohereCohereServiceSettings
-    task_settings?: InferencePutCohereCohereTaskSettings
+    service: InferenceCohereServiceType
+    service_settings: InferenceCohereServiceSettings
+    task_settings?: InferenceCohereTaskSettings
   }
 }
 
 export type InferencePutCohereResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutCohereServiceType = 'cohere'
-
-export type InferencePutCohereSimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
-
-export type InferencePutCohereTruncateType = 'END' | 'NONE' | 'START'
-
-export interface InferencePutEisEisServiceSettings {
-  model_id: string
-  rate_limit?: InferenceRateLimitSetting
-}
-
-export type InferencePutEisEisTaskType = 'chat_completion'
-
 export interface InferencePutEisRequest extends RequestBase {
-  task_type: InferencePutEisEisTaskType
+  task_type: InferenceEisTaskType
   eis_inference_id: Id
   body?: {
-    service: InferencePutEisServiceType
-    service_settings: InferencePutEisEisServiceSettings
+    service: InferenceEisServiceType
+    service_settings: InferenceEisServiceSettings
   }
 }
 
 export type InferencePutEisResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutEisServiceType = 'elastic'
-
-export interface InferencePutElasticsearchElasticsearchServiceSettings {
-  adaptive_allocations?: InferenceAdaptiveAllocations
-  deployment_id?: string
-  model_id: string
-  num_allocations?: integer
-  num_threads: integer
-}
-
-export interface InferencePutElasticsearchElasticsearchTaskSettings {
-  return_documents?: boolean
-}
-
-export type InferencePutElasticsearchElasticsearchTaskType = 'rerank' | 'sparse_embedding' | 'text_embedding'
-
 export interface InferencePutElasticsearchRequest extends RequestBase {
-  task_type: InferencePutElasticsearchElasticsearchTaskType
+  task_type: InferenceElasticsearchTaskType
   elasticsearch_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutElasticsearchServiceType
-    service_settings: InferencePutElasticsearchElasticsearchServiceSettings
-    task_settings?: InferencePutElasticsearchElasticsearchTaskSettings
+    service: InferenceElasticsearchServiceType
+    service_settings: InferenceElasticsearchServiceSettings
+    task_settings?: InferenceElasticsearchTaskSettings
   }
 }
 
 export type InferencePutElasticsearchResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutElasticsearchServiceType = 'elasticsearch'
-
-export interface InferencePutElserElserServiceSettings {
-  adaptive_allocations?: InferenceAdaptiveAllocations
-  num_allocations: integer
-  num_threads: integer
-}
-
-export type InferencePutElserElserTaskType = 'sparse_embedding'
-
 export interface InferencePutElserRequest extends RequestBase {
-  task_type: InferencePutElserElserTaskType
+  task_type: InferenceElserTaskType
   elser_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutElserServiceType
-    service_settings: InferencePutElserElserServiceSettings
+    service: InferenceElserServiceType
+    service_settings: InferenceElserServiceSettings
   }
 }
 
 export type InferencePutElserResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutElserServiceType = 'elser'
-
-export interface InferencePutGoogleaistudioGoogleAiStudioServiceSettings {
-  api_key: string
-  model_id: string
-  rate_limit?: InferenceRateLimitSetting
-}
-
-export type InferencePutGoogleaistudioGoogleAiStudioTaskType = 'completion' | 'text_embedding'
-
 export interface InferencePutGoogleaistudioRequest extends RequestBase {
-  task_type: InferencePutGoogleaistudioGoogleAiStudioTaskType
+  task_type: InferenceGoogleAiStudioTaskType
   googleaistudio_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutGoogleaistudioServiceType
-    service_settings: InferencePutGoogleaistudioGoogleAiStudioServiceSettings
+    service: InferenceGoogleAiServiceType
+    service_settings: InferenceGoogleAiStudioServiceSettings
   }
 }
 
 export type InferencePutGoogleaistudioResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutGoogleaistudioServiceType = 'googleaistudio'
-
-export interface InferencePutGooglevertexaiGoogleVertexAIServiceSettings {
-  location: string
-  model_id: string
-  project_id: string
-  rate_limit?: InferenceRateLimitSetting
-  service_account_json: string
-}
-
-export interface InferencePutGooglevertexaiGoogleVertexAITaskSettings {
-  auto_truncate?: boolean
-  top_n?: integer
-}
-
-export type InferencePutGooglevertexaiGoogleVertexAITaskType = 'rerank' | 'text_embedding'
-
 export interface InferencePutGooglevertexaiRequest extends RequestBase {
-  task_type: InferencePutGooglevertexaiGoogleVertexAITaskType
+  task_type: InferenceGoogleVertexAITaskType
   googlevertexai_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutGooglevertexaiServiceType
-    service_settings: InferencePutGooglevertexaiGoogleVertexAIServiceSettings
-    task_settings?: InferencePutGooglevertexaiGoogleVertexAITaskSettings
+    service: InferenceGoogleVertexAIServiceType
+    service_settings: InferenceGoogleVertexAIServiceSettings
+    task_settings?: InferenceGoogleVertexAITaskSettings
   }
 }
 
 export type InferencePutGooglevertexaiResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutGooglevertexaiServiceType = 'googlevertexai'
-
-export interface InferencePutHuggingFaceHuggingFaceServiceSettings {
-  api_key: string
-  rate_limit?: InferenceRateLimitSetting
-  url: string
-}
-
-export type InferencePutHuggingFaceHuggingFaceTaskType = 'text_embedding'
-
 export interface InferencePutHuggingFaceRequest extends RequestBase {
-  task_type: InferencePutHuggingFaceHuggingFaceTaskType
+  task_type: InferenceHuggingFaceTaskType
   huggingface_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutHuggingFaceServiceType
-    service_settings: InferencePutHuggingFaceHuggingFaceServiceSettings
+    service: InferenceHuggingFaceServiceType
+    service_settings: InferenceHuggingFaceServiceSettings
   }
 }
 
 export type InferencePutHuggingFaceResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutHuggingFaceServiceType = 'hugging_face'
-
-export interface InferencePutJinaaiJinaAIServiceSettings {
-  api_key: string
-  model_id?: string
-  rate_limit?: InferenceRateLimitSetting
-  similarity?: InferencePutJinaaiSimilarityType
-}
-
-export interface InferencePutJinaaiJinaAITaskSettings {
-  return_documents?: boolean
-  task?: InferencePutJinaaiTextEmbeddingTask
-  top_n?: integer
-}
-
-export type InferencePutJinaaiJinaAITaskType = 'rerank' | 'text_embedding'
-
 export interface InferencePutJinaaiRequest extends RequestBase {
-  task_type: InferencePutJinaaiJinaAITaskType
+  task_type: InferenceJinaAITaskType
   jinaai_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutJinaaiServiceType
-    service_settings: InferencePutJinaaiJinaAIServiceSettings
-    task_settings?: InferencePutJinaaiJinaAITaskSettings
+    service: InferenceJinaAIServiceType
+    service_settings: InferenceJinaAIServiceSettings
+    task_settings?: InferenceJinaAITaskSettings
   }
 }
 
 export type InferencePutJinaaiResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutJinaaiServiceType = 'jinaai'
-
-export type InferencePutJinaaiSimilarityType = 'cosine' | 'dot_product' | 'l2_norm'
-
-export type InferencePutJinaaiTextEmbeddingTask = 'classification' | 'clustering' | 'ingest' | 'search'
-
-export interface InferencePutMistralMistralServiceSettings {
-  api_key: string
-  max_input_tokens?: integer
-  model: string
-  rate_limit?: InferenceRateLimitSetting
-}
-
-export type InferencePutMistralMistralTaskType = 'text_embedding'
-
 export interface InferencePutMistralRequest extends RequestBase {
-  task_type: InferencePutMistralMistralTaskType
+  task_type: InferenceMistralTaskType
   mistral_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutMistralServiceType
-    service_settings: InferencePutMistralMistralServiceSettings
+    service: InferenceMistralServiceType
+    service_settings: InferenceMistralServiceSettings
   }
 }
 
 export type InferencePutMistralResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutMistralServiceType = 'mistral'
-
-export interface InferencePutOpenaiOpenAIServiceSettings {
-  api_key: string
-  dimensions?: integer
-  model_id: string
-  organization_id?: string
-  rate_limit?: InferenceRateLimitSetting
-  url?: string
-}
-
-export interface InferencePutOpenaiOpenAITaskSettings {
-  user?: string
-}
-
-export type InferencePutOpenaiOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding'
-
 export interface InferencePutOpenaiRequest extends RequestBase {
-  task_type: InferencePutOpenaiOpenAITaskType
+  task_type: InferenceOpenAITaskType
   openai_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutOpenaiServiceType
-    service_settings: InferencePutOpenaiOpenAIServiceSettings
-    task_settings?: InferencePutOpenaiOpenAITaskSettings
+    service: InferenceOpenAIServiceType
+    service_settings: InferenceOpenAIServiceSettings
+    task_settings?: InferenceOpenAITaskSettings
   }
 }
 
 export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutOpenaiServiceType = 'openai'
-
 export interface InferencePutVoyageaiRequest extends RequestBase {
-  task_type: InferencePutVoyageaiVoyageAITaskType
+  task_type: InferenceVoyageAITaskType
   voyageai_inference_id: Id
   body?: {
     chunking_settings?: InferenceInferenceChunkingSettings
-    service: InferencePutVoyageaiServiceType
-    service_settings: InferencePutVoyageaiVoyageAIServiceSettings
-    task_settings?: InferencePutVoyageaiVoyageAITaskSettings
+    service: InferenceVoyageAIServiceType
+    service_settings: InferenceVoyageAIServiceSettings
+    task_settings?: InferenceVoyageAITaskSettings
   }
 }
 
 export type InferencePutVoyageaiResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutVoyageaiServiceType = 'voyageai'
-
-export interface InferencePutVoyageaiVoyageAIServiceSettings {
-  dimensions?: integer
-  model_id: string
-  rate_limit?: InferenceRateLimitSetting
-  embedding_type?: float
-}
-
-export interface InferencePutVoyageaiVoyageAITaskSettings {
-  input_type?: string
-  return_documents?: boolean
-  top_k?: integer
-  truncation?: boolean
-}
-
-export type InferencePutVoyageaiVoyageAITaskType = 'text_embedding' | 'rerank'
-
 export interface InferencePutWatsonxRequest extends RequestBase {
-  task_type: InferencePutWatsonxWatsonxTaskType
+  task_type: InferenceWatsonxTaskType
   watsonx_inference_id: Id
   body?: {
-    service: InferencePutWatsonxServiceType
-    service_settings: InferencePutWatsonxWatsonxServiceSettings
+    service: InferenceWatsonxServiceType
+    service_settings: InferenceWatsonxServiceSettings
   }
 }
 
 export type InferencePutWatsonxResponse = InferenceInferenceEndpointInfo
 
-export type InferencePutWatsonxServiceType = 'watsonxai'
-
-export interface InferencePutWatsonxWatsonxServiceSettings {
-  api_key: string
-  api_version: string
-  model_id: string
-  project_id: string
-  rate_limit?: InferenceRateLimitSetting
-  url: string
-}
-
-export type InferencePutWatsonxWatsonxTaskType = 'text_embedding'
-
 export interface InferenceRerankRequest extends RequestBase {
   inference_id: Id
   timeout?: Duration
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 16250c1159..4ab5ce909b 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -17,15 +17,13 @@
  * under the License.
  */
 
-import {
-  CompletionTool,
-  CompletionToolType,
-  Message
-} from '@inference/chat_completion_unified/UnifiedRequest'
+import { RateLimitSetting } from '@inference/_types/Services'
+import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
 import { float, integer, long } from '@_types/Numeric'
 
-export interface RequestChatCompletionBase extends RequestBase {
+export class RequestChatCompletionBase extends RequestBase {
   /**
    * A list of objects representing the conversation.
    */
@@ -78,3 +76,1114 @@ export class AdaptiveAllocations {
    */
   min_number_of_allocations?: integer
 }
+
+/**
+ * @codegen_names string, object
+ */
+export type CompletionToolType = string | CompletionToolChoice
+
+/**
+ * An object style representation of a single portion of a conversation.
+ */
+export interface ContentObject {
+  /**
+   * The text content.
+   */
+  text: string
+  /**
+   * The type of content.
+   */
+  type: string
+}
+
+/**
+ * The function that the model called.
+ */
+export interface ToolCallFunction {
+  /**
+   * The arguments to call the function with in JSON format.
+   */
+  arguments: string
+  /**
+   * The name of the function to call.
+   */
+  name: string
+}
+
+/**
+ * A tool call generated by the model.
+ */
+export interface ToolCall {
+  /**
+   * The identifier of the tool call.
+   */
+  id: Id
+  /**
+   * The function that the model called.
+   */
+  function: ToolCallFunction
+  /**
+   * The type of the tool call.
+   */
+  type: string
+}
+
+/**
+ * @codegen_names string, object
+ */
+export type MessageContent = string | Array<ContentObject>
+
+/**
+ * An object representing part of the conversation.
+ */
+export interface Message {
+  /**
+   * The content of the message.
+   */
+  content?: MessageContent
+  /**
+   * The role of the message author.
+   */
+  role: string
+  /**
+   * The tool call that this message is responding to.
+   */
+  tool_call_id?: Id
+  /**
+   * The tool calls generated by the model.
+   */
+  tool_calls?: Array<ToolCall>
+}
+
+/**
+ * The tool choice function.
+ *
+ */
+export interface CompletionToolChoiceFunction {
+  /**
+   * The name of the function to call.
+   */
+  name: string
+}
+
+/**
+ * Controls which tool is called by the model.
+ */
+export interface CompletionToolChoice {
+  /**
+   * The type of the tool.
+   */
+  type: string
+  /**
+   * The tool choice function.
+   */
+  function: CompletionToolChoiceFunction
+}
+
+/**
+ * The completion tool function definition.
+ */
+export interface CompletionToolFunction {
+  /**
+   * A description of what the function does.
+   * This is used by the model to choose when and how to call the function.
+   */
+  description?: string
+  /**
+   * The name of the function.
+   */
+  name: string
+  /**
+   * The parameters the functional accepts. This should be formatted as a JSON object.
+   */
+  parameters?: UserDefinedValue
+  /**
+   * Whether to enable schema adherence when generating the function call.
+   */
+  strict?: boolean
+}
+
+/**
+ * A list of tools that the model can call.
+ */
+export interface CompletionTool {
+  /**
+   * The type of tool.
+   */
+  type: string
+  /**
+   * The function definition.
+   */
+  function: CompletionToolFunction
+}
+
+export class AlibabaCloudServiceSettings {
+  /**
+   * A valid API key for the AlibabaCloud AI Search API.
+   */
+  api_key: string
+  /**
+   * The name of the host address used for the inference task.
+   * You can find the host address in the API keys section of the documentation.
+   * @ext_doc_id alibabacloud-api-keys
+   */
+  host: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.
+   * By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The name of the model service to use for the inference task.
+   * The following service IDs are available for the `completion` task:
+   *
+   * * `ops-qwen-turbo`
+   * * `qwen-turbo`
+   * * `qwen-plus`
+   * * `qwen-max ÷ qwen-max-longcontext`
+   *
+   * The following service ID is available for the `rerank` task:
+   *
+   * * `ops-bge-reranker-larger`
+   *
+   * The following service ID is available for the `sparse_embedding` task:
+   *
+   * * `ops-text-sparse-embedding-001`
+   *
+   * The following service IDs are available for the `text_embedding` task:
+   *
+   * `ops-text-embedding-001`
+   * `ops-text-embedding-zh-001`
+   * `ops-text-embedding-en-001`
+   * `ops-text-embedding-002`
+   */
+  service_id: string
+  /**
+   * The name of the workspace used for the inference task.
+   */
+  workspace: string
+}
+
+export class AlibabaCloudTaskSettings {
+  /**
+   * For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.
+   * Valid values are:
+   *
+   * * `ingest` for storing document embeddings in a vector database.
+   * * `search` for storing embeddings of search queries run against a vector database to find relevant documents.
+   */
+  input_type?: string
+  /**
+   * For a `sparse_embedding` task, it affects whether the token name will be returned in the response.
+   * It defaults to `false`, which means only the token ID will be returned in the response.
+   */
+  return_token?: boolean
+}
+
+export enum AlibabaCloudTaskType {
+  completion,
+  rerank,
+  space_embedding,
+  text_embedding
+}
+
+export enum AlibabaCloudServiceType {
+  'alibabacloud-ai-search'
+}
+
+export class AmazonBedrockServiceSettings {
+  /**
+   * A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.
+   */
+  access_key: string
+  /**
+   * The base model ID or an ARN to a custom model based on a foundational model.
+   * The base model IDs can be found in the Amazon Bedrock documentation.
+   * Note that the model ID must be available for the provider chosen and your IAM user must have access to the model.
+   * @ext_doc_id amazonbedrock-models
+   */
+  model: string
+  /**
+   * The model provider for your deployment.
+   * Note that some providers may support only certain task types.
+   * Supported providers include:
+   *
+   * * `amazontitan` - available for `text_embedding` and `completion` task types
+   * * `anthropic` - available for `completion` task type only
+   * * `ai21labs` - available for `completion` task type only
+   * * `cohere` - available for `text_embedding` and `completion` task types
+   * * `meta` - available for `completion` task type only
+   * * `mistral` - available for `completion` task type only
+   */
+  provider?: string
+  /**
+   * The region that your model or ARN is deployed in.
+   * The list of available regions per model can be found in the Amazon Bedrock documentation.
+   * @ext_doc_id amazonbedrock-models
+   */
+  region: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Watsonx.
+   * By default, the `watsonxai` service sets the number of requests allowed per minute to 120.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * A valid AWS secret key that is paired with the `access_key`.
+   * For informationg about creating and managing access and secret keys, refer to the AWS documentation.
+   * @ext_doc_id amazonbedrock-secret-keys
+   */
+  secret_key: string
+}
+
+export class AmazonBedrockTaskSettings {
+  /**
+   * For a `completion` task, it sets the maximum number for the output tokens to be generated.
+   * @server_default 64
+   */
+  max_new_tokens?: integer
+  /**
+   * For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.
+   * At temperature 0.0 the model is most deterministic, at temperature 1.0 most random.
+   * It should not be used if `top_p` or `top_k` is specified.
+   */
+  temperature?: float
+  /**
+   * For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.
+   * It is only available for anthropic, cohere, and mistral providers.
+   * It is an alternative to `temperature`; it should not be used if `temperature` is specified.
+   */
+  top_k?: float
+  /**
+   * For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.
+   * Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.
+   * It is an alternative to `temperature`; it should not be used if `temperature` is specified.
+   */
+  top_p?: float
+}
+
+export enum AmazonBedrockTaskType {
+  completion,
+  text_embedding
+}
+
+export enum AmazonBedrockServiceType {
+  amazonbedrock
+}
+
+export class AnthropicServiceSettings {
+  /**
+   * A valid API key for the Anthropic API.
+   */
+  api_key: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Anthropic documentation for the list of supported models.
+   * @ext_doc_id anothropic-models
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Anthropic.
+   * By default, the `anthropic` service sets the number of requests allowed per minute to 50.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export class AnthropicTaskSettings {
+  /**
+   * For a `completion` task, it is the maximum number of tokens to generate before stopping.
+   */
+  max_tokens: integer
+  /**
+   * For a `completion` task, it is the amount of randomness injected into the response.
+   * For more details about the supported range, refer to Anthropic documentation.
+   * @ext_doc_id anthropic-messages
+   */
+  temperature?: float
+  /**
+   * For a `completion` task, it specifies to only sample from the top K options for each subsequent token.
+   * It is recommended for advanced use cases only.
+   * You usually only need to use `temperature`.
+   */
+  top_k?: integer
+  /**
+   * For a `completion` task, it specifies to use Anthropic's nucleus sampling.
+   * In nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.
+   * You should either alter `temperature` or `top_p`, but not both.
+   * It is recommended for advanced use cases only.
+   * You usually only need to use `temperature`.
+   */
+  top_p?: float
+}
+
+export enum AnthropicTaskType {
+  completion
+}
+
+export enum AnthropicServiceType {
+  anthropic
+}
+
+export class AzureAiStudioServiceSettings {
+  /**
+   * A valid API key of your Azure AI Studio model deployment.
+   * This key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id azureaistudio-api-keys
+   */
+  api_key: string
+  /**
+   * The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.
+   * The `token` endpoint type is for "pay as you go" endpoints that are billed per token.
+   * The `realtime` endpoint type is for "real-time" endpoints that are billed per hour of usage.
+   * @ext_doc_id azureaistudio-endpoint-types
+   */
+  endpoint_type: string
+  /**
+   * The target URL of your Azure AI Studio model deployment.
+   * This can be found on the overview page for your deployment in the management section of your Azure AI Studio account.
+   */
+  target: string
+  /**
+   * The model provider for your deployment.
+   * Note that some providers may support only certain task types.
+   * Supported providers include:
+   *
+   * * `cohere` - available for `text_embedding` and `completion` task types
+   * * `databricks` - available for `completion` task type only
+   * * `meta` - available for `completion` task type only
+   * * `microsoft_phi` - available for `completion` task type only
+   * * `mistral` - available for `completion` task type only
+   * * `openai` - available for `text_embedding` and `completion` task types
+   */
+  provider: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.
+   * By default, the `azureaistudio` service sets the number of requests allowed per minute to 240.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export class AzureAiStudioTaskSettings {
+  /**
+   * For a `completion` task, instruct the inference process to perform sampling.
+   * It has no effect unless `temperature` or `top_p` is specified.
+   */
+  do_sample?: float
+  /**
+   * For a `completion` task, provide a hint for the maximum number of output tokens to be generated.
+   * @server_default 64
+   */
+  max_new_tokens?: integer
+  /**
+   * For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.
+   * It must be a number in the range of 0.0 to 2.0.
+   * It should not be used if `top_p` is specified.
+   */
+  temperature?: float
+  /**
+   * For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.
+   * It is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.
+   * It should not be used if `temperature` is specified.
+   */
+  top_p?: float
+  /**
+   * For a `text_embedding` task, specify the user issuing the request.
+   * This information can be used for abuse detection.
+   */
+  user?: string
+}
+
+export enum AzureAiStudioTaskType {
+  completion,
+  text_embedding
+}
+
+export enum AzureAiStudioServiceType {
+  azureaistudio
+}
+
+export class AzureOpenAIServiceSettings {
+  /**
+   * A valid API key for your Azure OpenAI account.
+   * You must specify either `api_key` or `entra_id`.
+   * If you do not provide either or you provide both, you will receive an error when you try to create your model.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id azureopenai-auth
+   */
+  api_key?: string
+  /**
+   * The Azure API version ID to use.
+   * It is recommended to use the latest supported non-preview version.
+   */
+  api_version: string
+  /**
+   * The deployment name of your deployed models.
+   * Your Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.
+   * @ext_doc_id azureopenai
+   */
+  deployment_id: string
+  /**
+   * A valid Microsoft Entra token.
+   * You must specify either `api_key` or `entra_id`.
+   * If you do not provide either or you provide both, you will receive an error when you try to create your model.
+   * @ext_doc_id azureopenai-auth
+   */
+  entra_id?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Azure.
+   * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
+   * For `text_embedding`, it is set to `1440`.
+   * For `completion`, it is set to `120`.
+   * @ext_doc_id azureopenai-quota-limits
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The name of your Azure OpenAI resource.
+   * You can find this from the list of resources in the Azure Portal for your subscription.
+   * @ext_doc_id azureopenai-portal
+   */
+  resource_name: string
+}
+
+export class AzureOpenAITaskSettings {
+  /**
+   * For a `completion` or `text_embedding` task, specify the user issuing the request.
+   * This information can be used for abuse detection.
+   */
+  user?: string
+}
+
+export enum AzureOpenAITaskType {
+  completion,
+  text_embedding
+}
+
+export enum AzureOpenAIServiceType {
+  azureopenai
+}
+
+export class CohereServiceSettings {
+  /**
+   * A valid API key for your Cohere account.
+   * You can find or create your Cohere API keys on the Cohere API key settings page.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id cohere-api-keys
+   */
+  api_key: string
+  /**
+   * For a `text_embedding` task, the types of embeddings you want to get back.
+   * Use `byte` for signed int8 embeddings (this is a synonym of `int8`).
+   * Use `float` for the default float embeddings.
+   * Use `int8` for signed int8 embeddings.
+   * @server_default float
+   */
+  embedding_type?: CohereEmbeddingType
+  /**
+   * For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.
+   *
+   * * For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).
+   * * For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).
+   * * For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).
+   *
+   * The default value for a text embedding task is `embed-english-v2.0`.
+   */
+  model_id?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Cohere.
+   * By default, the `cohere` service sets the number of requests allowed per minute to 10000.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The similarity measure.
+   * If the `embedding_type` is `float`, the default value is `dot_product`.
+   * If the `embedding_type` is `int8` or `byte`, the default value is `cosine`.
+   */
+  similarity?: CohereSimilarityType
+}
+
+export enum CohereTaskType {
+  completion,
+  rerank,
+  text_embedding
+}
+
+export enum CohereServiceType {
+  cohere
+}
+
+export enum CohereEmbeddingType {
+  byte,
+  float,
+  int8
+}
+
+export enum CohereInputType {
+  classification,
+  clustering,
+  ingest,
+  search
+}
+
+export enum CohereSimilarityType {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export enum CohereTruncateType {
+  END,
+  NONE,
+  START
+}
+
+export class CohereTaskSettings {
+  /**
+   * For a `text_embedding` task, the type of input passed to the model.
+   * Valid values are:
+   *
+   * * `classification`: Use it for embeddings passed through a text classifier.
+   * * `clustering`: Use it for the embeddings run through a clustering algorithm.
+   * * `ingest`: Use it for storing document embeddings in a vector database.
+   * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.
+   *
+   * IMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.
+   */
+  input_type?: CohereInputType
+  /**
+   * For a `rerank` task, return doc text within the results.
+   */
+  return_documents?: boolean
+  /**
+   * For a `rerank` task, the number of most relevant documents to return.
+   * It defaults to the number of the documents.
+   * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.
+   */
+  top_n?: integer
+  /**
+   * For a `text_embedding` task, the method to handle inputs longer than the maximum token length.
+   * Valid values are:
+   *
+   * * `END`: When the input exceeds the maximum input token length, the end of the input is discarded.
+   * * `NONE`: When the input exceeds the maximum input token length, an error is returned.
+   * * `START`: When the input exceeds the maximum input token length, the start of the input is discarded.
+   */
+  truncate?: CohereTruncateType
+}
+
+export class EisServiceSettings {
+  /**
+   * The name of the model to use for the inference task.
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned.
+   * By default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum EisTaskType {
+  chat_completion
+}
+
+export enum EisServiceType {
+  elastic
+}
+
+export class ElasticsearchServiceSettings {
+  /**
+   * Adaptive allocations configuration details.
+   * If `enabled` is true, the number of allocations of the model is set based on the current load the process gets.
+   * When the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.
+   * When the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.
+   * If `enabled` is true, do not set the number of allocations manually.
+   */
+  adaptive_allocations?: AdaptiveAllocations
+  /**
+   * The deployment identifier for a trained model deployment.
+   * When `deployment_id` is used the `model_id` is optional.
+   */
+  deployment_id?: string
+  /**
+   * The name of the model to use for the inference task.
+   * It can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.
+   * @ext_doc_id eland-import
+   */
+  model_id: string
+  /**
+   * The total number of allocations that are assigned to the model across machine learning nodes.
+   * Increasing this value generally increases the throughput.
+   * If adaptive allocations are enabled, do not set this value because it's automatically set.
+   */
+  num_allocations?: integer
+  /**
+   * The number of threads used by each model allocation during inference.
+   * This setting generally increases the speed per inference request.
+   * The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
+   * The value must be a power of 2.
+   * The maximum value is 32.
+   */
+  num_threads: integer
+}
+
+export class ElasticsearchTaskSettings {
+  /**
+   * For a `rerank` task, return the document instead of only the index.
+   * @server_default true
+   */
+  return_documents?: boolean
+}
+
+export enum ElasticsearchTaskType {
+  rerank,
+  sparse_embedding,
+  text_embedding
+}
+
+export enum ElasticsearchServiceType {
+  elasticsearch
+}
+
+export class ElserServiceSettings {
+  /**
+   * Adaptive allocations configuration details.
+   * If `enabled` is true, the number of allocations of the model is set based on the current load the process gets.
+   * When the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.
+   * When the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.
+   * If `enabled` is true, do not set the number of allocations manually.
+   */
+  adaptive_allocations?: AdaptiveAllocations
+  /**
+   * The total number of allocations this model is assigned across machine learning nodes.
+   * Increasing this value generally increases the throughput.
+   * If adaptive allocations is enabled, do not set this value because it's automatically set.
+   */
+  num_allocations: integer
+  /**
+   * The number of threads used by each model allocation during inference.
+   * Increasing this value generally increases the speed per inference request.
+   * The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
+   * The value must be a power of 2.
+   * The maximum value is 32.
+   *
+   * > info
+   * > If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.
+   */
+  num_threads: integer
+}
+
+export enum ElserTaskType {
+  sparse_embedding
+}
+
+export enum ElserServiceType {
+  elser
+}
+
+export class GoogleAiStudioServiceSettings {
+  /**
+   * A valid API key of your Google Gemini account.
+   */
+  api_key: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Google documentation for the list of supported models.
+   * @ext_doc_id googleaistudio-models
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Google AI Studio.
+   * By default, the `googleaistudio` service sets the number of requests allowed per minute to 360.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum GoogleAiStudioTaskType {
+  completion,
+  text_embedding
+}
+
+export enum GoogleAiServiceType {
+  googleaistudio
+}
+
+export class GoogleVertexAIServiceSettings {
+  /**
+   * The name of the location to use for the inference task.
+   * Refer to the Google documentation for the list of supported locations.
+   * @ext_doc_id googlevertexai-locations
+   */
+  location: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Google documentation for the list of supported models.
+   * @ext_doc_id googlevertexai-models
+   */
+  model_id: string
+  /**
+   * The name of the project to use for the inference task.
+   */
+  project_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.
+   * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * A valid service account in JSON format for the Google Vertex AI API.
+   */
+  service_account_json: string
+}
+
+export class GoogleVertexAITaskSettings {
+  /**
+   * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.
+   */
+  auto_truncate?: boolean
+  /**
+   * For a `rerank` task, the number of the top N documents that should be returned.
+   */
+  top_n?: integer
+}
+
+export enum GoogleVertexAITaskType {
+  rerank,
+  text_embedding
+}
+
+export enum GoogleVertexAIServiceType {
+  googlevertexai
+}
+
+export class HuggingFaceServiceSettings {
+  /**
+   * A valid access token for your HuggingFace account.
+   * You can create or find your access tokens on the HuggingFace settings page.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id huggingface-tokens
+   */
+  api_key: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Hugging Face.
+   * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The URL endpoint to use for the requests.
+   */
+  url: string
+}
+
+export enum HuggingFaceTaskType {
+  text_embedding
+}
+
+export enum HuggingFaceServiceType {
+  hugging_face
+}
+
+export class JinaAIServiceSettings {
+  /**
+   * A valid API key of your JinaAI account.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id jinaAi-embeddings
+   */
+  api_key: string
+  /**
+   * The name of the model to use for the inference task.
+   * For a `rerank` task, it is required.
+   * For a `text_embedding` task, it is optional.
+   */
+  model_id?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from JinaAI.
+   * By default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.
+   * @ext_doc_id jinaAi-rate-limit
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
+   * The default values varies with the embedding type.
+   * For example, a float embedding type uses a `dot_product` similarity measure by default.
+   */
+  similarity?: JinaAISimilarityType
+}
+
+export class JinaAITaskSettings {
+  /**
+   * For a `rerank` task, return the doc text within the results.
+   */
+  return_documents?: boolean
+  /**
+   * For a `text_embedding` task, the task passed to the model.
+   * Valid values are:
+   *
+   * * `classification`: Use it for embeddings passed through a text classifier.
+   * * `clustering`: Use it for the embeddings run through a clustering algorithm.
+   * * `ingest`: Use it for storing document embeddings in a vector database.
+   * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.
+   */
+  task?: JinaAITextEmbeddingTask
+  /**
+   * For a `rerank` task, the number of most relevant documents to return.
+   * It defaults to the number of the documents.
+   * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.
+   */
+  top_n?: integer
+}
+
+export enum JinaAITaskType {
+  rerank,
+  text_embedding
+}
+
+export enum JinaAIServiceType {
+  jinaai
+}
+
+export enum JinaAISimilarityType {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export enum JinaAITextEmbeddingTask {
+  classification,
+  clustering,
+  ingest,
+  search
+}
+
+export class MistralServiceSettings {
+  /**
+   * A valid API key of your Mistral account.
+   * You can find your Mistral API keys or you can create a new one on the API Keys page.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id mistral-api-keys
+   */
+  api_key: string
+  /**
+   * The maximum number of tokens per input before chunking occurs.
+   */
+  max_input_tokens?: integer
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Mistral models documentation for the list of available text embedding models.
+   * @ext_doc_id mistral-api-models
+   */
+  model: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the Mistral API.
+   * By default, the `mistral` service sets the number of requests allowed per minute to 240.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum MistralTaskType {
+  text_embedding
+}
+
+export enum MistralServiceType {
+  mistral
+}
+
+export class OpenAIServiceSettings {
+  /**
+   * A valid API key of your OpenAI account.
+   * You can find your OpenAI API keys in your OpenAI account under the API keys section.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id openai-api-keys
+   */
+  api_key: string
+  /**
+   * The number of dimensions the resulting output embeddings should have.
+   * It is supported only in `text-embedding-3` and later models.
+   * If it is not set, the OpenAI defined default for the model is used.
+   */
+  dimensions?: integer
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the OpenAI documentation for the list of available text embedding models.
+   * @ext_doc_id openai-models
+   */
+  model_id: string
+  /**
+   * The unique identifier for your organization.
+   * You can find the Organization ID in your OpenAI account under *Settings > Organizations*.
+   */
+  organization_id?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from OpenAI.
+   * The `openai` service sets a default number of requests allowed per minute depending on the task type.
+   * For `text_embedding`, it is set to `3000`.
+   * For `completion`, it is set to `500`.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The URL endpoint to use for the requests.
+   * It can be changed for testing purposes.
+   * @server_default https://api.openai.com/v1/embeddings.
+   */
+  url?: string
+}
+
+export class OpenAITaskSettings {
+  /**
+   * For a `completion` or `text_embedding` task, specify the user issuing the request.
+   * This information can be used for abuse detection.
+   */
+  user?: string
+}
+
+export enum OpenAITaskType {
+  chat_completion,
+  completion,
+  text_embedding
+}
+
+export enum OpenAIServiceType {
+  openai
+}
+
+export class VoyageAIServiceSettings {
+  /**
+   * The number of dimensions for resulting output embeddings.
+   * This setting maps to `output_dimension` in the VoyageAI documentation.
+   * Only for the `text_embedding` task type.
+   * @ext_doc_id voyageai-embeddings
+   */
+  dimensions?: integer
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the VoyageAI documentation for the list of available text embedding and rerank models.
+   * @ext_doc_id voyageai-embeddings
+   * @ext_doc_id voyageai-rerank
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from VoyageAI.
+   * The `voyageai` service sets a default number of requests allowed per minute depending on the task type.
+   * For both `text_embedding` and `rerank`, it is set to `2000`.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The data type for the embeddings to be returned.
+   * This setting maps to `output_dtype` in the VoyageAI documentation.
+   * Permitted values: float, int8, bit.
+   * `int8` is a synonym of `byte` in the VoyageAI documentation.
+   * `bit` is a synonym of `binary` in the VoyageAI documentation.
+   * Only for the `text_embedding` task type.
+   * @ext_doc_id voyageai-embeddings
+   */
+  embedding_type?: float
+}
+
+export class VoyageAITaskSettings {
+  /**
+   * Type of the input text.
+   * Permitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).
+   * Only for the `text_embedding` task type.
+   */
+  input_type?: string
+  /**
+   * Whether to return the source documents in the response.
+   * Only for the `rerank` task type.
+   * @server_default false
+   */
+  return_documents?: boolean
+  /**
+   * The number of most relevant documents to return.
+   * If not specified, the reranking results of all documents will be returned.
+   * Only for the `rerank` task type.
+   */
+  top_k?: integer
+  /**
+   * Whether to truncate the input texts to fit within the context length.
+   * @server_default true
+   */
+  truncation?: boolean
+}
+
+export enum VoyageAITaskType {
+  text_embedding,
+  rerank
+}
+
+export enum VoyageAIServiceType {
+  voyageai
+}
+
+export class WatsonxServiceSettings {
+  /**
+   * A valid API key of your Watsonx account.
+   * You can find your Watsonx API keys or you can create a new one on the API keys page.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id watsonx-api-keys
+   */
+  api_key: string
+  /**
+   * A version parameter that takes a version date in the format of `YYYY-MM-DD`.
+   * For the active version data parameters, refer to the Wastonx documentation.
+   * @ext_doc_id watsonx-api-version
+   */
+  api_version: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.
+   * @ext_doc_id watsonx-api-models
+   */
+  model_id: string
+  /**
+   * The identifier of the IBM Cloud project to use for the inference task.
+   */
+  project_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Watsonx.
+   * By default, the `watsonxai` service sets the number of requests allowed per minute to 120.
+   */
+  rate_limit?: RateLimitSetting
+  /**
+   * The URL of the inference endpoint that you created on Watsonx.
+   */
+  url: string
+}
+
+export enum WatsonxTaskType {
+  text_embedding
+}
+
+export enum WatsonxServiceType {
+  watsonxai
+}
diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts
index 1932021931..a77f8e68c6 100644
--- a/specification/inference/chat_completion_unified/UnifiedRequest.ts
+++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts
@@ -18,10 +18,9 @@
  */
 
 import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
-import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
+import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 import { Duration } from '@_types/Time'
-
 /**
  * Perform chat completion inference
  * @rest_spec_name inference.chat_completion_unified
@@ -29,7 +28,7 @@ import { Duration } from '@_types/Time'
  * @availability serverless stability=stable visibility=public
  * @doc_id inference-api-chat-completion
  */
-export interface Request extends RequestChatCompletionBase {
+export interface Request extends RequestBase {
   urls: [
     {
       path: '/_inference/chat_completion/{inference_id}/_stream'
@@ -49,144 +48,6 @@ export interface Request extends RequestChatCompletionBase {
      */
     timeout?: Duration
   }
-}
-
-/**
- * @codegen_names string, object
- */
-export type CompletionToolType = string | CompletionToolChoice
-
-/**
- * An object style representation of a single portion of a conversation.
- */
-export interface ContentObject {
-  /**
-   * The text content.
-   */
-  text: string
-  /**
-   * The type of content.
-   */
-  type: string
-}
-
-/**
- * The function that the model called.
- */
-export interface ToolCallFunction {
-  /**
-   * The arguments to call the function with in JSON format.
-   */
-  arguments: string
-  /**
-   * The name of the function to call.
-   */
-  name: string
-}
-
-/**
- * A tool call generated by the model.
- */
-export interface ToolCall {
-  /**
-   * The identifier of the tool call.
-   */
-  id: Id
-  /**
-   * The function that the model called.
-   */
-  function: ToolCallFunction
-  /**
-   * The type of the tool call.
-   */
-  type: string
-}
-
-/**
- * @codegen_names string, object
- */
-export type MessageContent = string | Array<ContentObject>
-
-/**
- * An object representing part of the conversation.
- */
-export interface Message {
-  /**
-   * The content of the message.
-   */
-  content?: MessageContent
-  /**
-   * The role of the message author.
-   */
-  role: string
-  /**
-   * The tool call that this message is responding to.
-   */
-  tool_call_id?: Id
-  /**
-   * The tool calls generated by the model.
-   */
-  tool_calls?: Array<ToolCall>
-}
-
-/**
- * The tool choice function.
- *
- */
-export interface CompletionToolChoiceFunction {
-  /**
-   * The name of the function to call.
-   */
-  name: string
-}
-
-/**
- * Controls which tool is called by the model.
- */
-export interface CompletionToolChoice {
-  /**
-   * The type of the tool.
-   */
-  type: string
-  /**
-   * The tool choice function.
-   */
-  function: CompletionToolChoiceFunction
-}
-
-/**
- * The completion tool function definition.
- */
-export interface CompletionToolFunction {
-  /**
-   * A description of what the function does.
-   * This is used by the model to choose when and how to call the function.
-   */
-  description?: string
-  /**
-   * The name of the function.
-   */
-  name: string
-  /**
-   * The parameters the functional accepts. This should be formatted as a JSON object.
-   */
-  parameters?: UserDefinedValue
-  /**
-   * Whether to enable schema adherence when generating the function call.
-   */
-  strict?: boolean
-}
-
-/**
- * A list of tools that the model can call.
- */
-export interface CompletionTool {
-  /**
-   * The type of tool.
-   */
-  type: string
-  /**
-   * The function definition.
-   */
-  function: CompletionToolFunction
+  /** @codegen_name chat_completion_request */
+  body: RequestChatCompletionBase
 }
diff --git a/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts
index 6088a1b0a1..2ab19abf72 100644
--- a/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts
+++ b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts
@@ -18,8 +18,8 @@
  */
 
 import { RequestChatCompletionBase } from '@inference/_types/CommonTypes'
+import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-
 /**
  * Perform a chat completion task through the Elastic Inference Service (EIS).
  *
@@ -30,7 +30,7 @@ import { Id } from '@_types/common'
  * @cluster_privileges manage_inference
  * @doc_id inference-api-post-eis-chat-completion
  */
-export interface Request extends RequestChatCompletionBase {
+export interface Request extends RequestBase {
   urls: [
     {
       path: '/_inference/chat_completion/{eis_inference_id}/_stream'
@@ -43,4 +43,6 @@ export interface Request extends RequestChatCompletionBase {
      */
     eis_inference_id: Id
   }
+  /** @codegen_name chat_completion_request */
+  body: RequestChatCompletionBase
 }
diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
index 11770daa84..1a6276a850 100644
--- a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
+++ b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
@@ -18,9 +18,12 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  AlibabaCloudServiceSettings,
+  AlibabaCloudServiceType,
+  AlibabaCloudTaskSettings,
+  AlibabaCloudTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -66,7 +69,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.
      */
-    service: ServiceType
+    service: AlibabaCloudServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.
      */
@@ -78,77 +81,3 @@ export interface Request extends RequestBase {
     task_settings?: AlibabaCloudTaskSettings
   }
 }
-
-export enum AlibabaCloudTaskType {
-  completion,
-  rerank,
-  space_embedding,
-  text_embedding
-}
-
-export enum ServiceType {
-  'alibabacloud-ai-search'
-}
-
-export class AlibabaCloudServiceSettings {
-  /**
-   * A valid API key for the AlibabaCloud AI Search API.
-   */
-  api_key: string
-  /**
-   * The name of the host address used for the inference task.
-   * You can find the host address in the API keys section of the documentation.
-   * @ext_doc_id alibabacloud-api-keys
-   */
-  host: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.
-   * By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The name of the model service to use for the inference task.
-   * The following service IDs are available for the `completion` task:
-   *
-   * * `ops-qwen-turbo`
-   * * `qwen-turbo`
-   * * `qwen-plus`
-   * * `qwen-max ÷ qwen-max-longcontext`
-   *
-   * The following service ID is available for the `rerank` task:
-   *
-   * * `ops-bge-reranker-larger`
-   *
-   * The following service ID is available for the `sparse_embedding` task:
-   *
-   * * `ops-text-sparse-embedding-001`
-   *
-   * The following service IDs are available for the `text_embedding` task:
-   *
-   * `ops-text-embedding-001`
-   * `ops-text-embedding-zh-001`
-   * `ops-text-embedding-en-001`
-   * `ops-text-embedding-002`
-   */
-  service_id: string
-  /**
-   * The name of the workspace used for the inference task.
-   */
-  workspace: string
-}
-
-export class AlibabaCloudTaskSettings {
-  /**
-   * For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.
-   * Valid values are:
-   *
-   * * `ingest` for storing document embeddings in a vector database.
-   * * `search` for storing embeddings of search queries run against a vector database to find relevant documents.
-   */
-  input_type?: string
-  /**
-   * For a `sparse_embedding` task, it affects whether the token name will be returned in the response.
-   * It defaults to `false`, which means only the token ID will be returned in the response.
-   */
-  return_token?: boolean
-}
diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
index f1647d5549..f755f67532 100644
--- a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
+++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  AmazonBedrockServiceSettings,
+  AmazonBedrockServiceType,
+  AmazonBedrockTaskSettings,
+  AmazonBedrockTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { float, integer } from '@_types/Numeric'
 
 /**
  * Create an Amazon Bedrock inference endpoint.
@@ -70,7 +72,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `amazonbedrock`.
      */
-    service: ServiceType
+    service: AmazonBedrockServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.
      */
@@ -82,82 +84,3 @@ export interface Request extends RequestBase {
     task_settings?: AmazonBedrockTaskSettings
   }
 }
-
-export enum AmazonBedrockTaskType {
-  completion,
-  text_embedding
-}
-
-export enum ServiceType {
-  amazonbedrock
-}
-
-export class AmazonBedrockServiceSettings {
-  /**
-   * A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.
-   */
-  access_key: string
-  /**
-   * The base model ID or an ARN to a custom model based on a foundational model.
-   * The base model IDs can be found in the Amazon Bedrock documentation.
-   * Note that the model ID must be available for the provider chosen and your IAM user must have access to the model.
-   * @ext_doc_id amazonbedrock-models
-   */
-  model: string
-  /**
-   * The model provider for your deployment.
-   * Note that some providers may support only certain task types.
-   * Supported providers include:
-   *
-   * * `amazontitan` - available for `text_embedding` and `completion` task types
-   * * `anthropic` - available for `completion` task type only
-   * * `ai21labs` - available for `completion` task type only
-   * * `cohere` - available for `text_embedding` and `completion` task types
-   * * `meta` - available for `completion` task type only
-   * * `mistral` - available for `completion` task type only
-   */
-  provider?: string
-  /**
-   * The region that your model or ARN is deployed in.
-   * The list of available regions per model can be found in the Amazon Bedrock documentation.
-   * @ext_doc_id amazonbedrock-models
-   */
-  region: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Watsonx.
-   * By default, the `watsonxai` service sets the number of requests allowed per minute to 120.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * A valid AWS secret key that is paired with the `access_key`.
-   * For informationg about creating and managing access and secret keys, refer to the AWS documentation.
-   * @ext_doc_id amazonbedrock-secret-keys
-   */
-  secret_key: string
-}
-
-export class AmazonBedrockTaskSettings {
-  /**
-   * For a `completion` task, it sets the maximum number for the output tokens to be generated.
-   * @server_default 64
-   */
-  max_new_tokens?: integer
-  /**
-   * For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.
-   * At temperature 0.0 the model is most deterministic, at temperature 1.0 most random.
-   * It should not be used if `top_p` or `top_k` is specified.
-   */
-  temperature?: float
-  /**
-   * For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.
-   * It is only available for anthropic, cohere, and mistral providers.
-   * It is an alternative to `temperature`; it should not be used if `temperature` is specified.
-   */
-  top_k?: float
-  /**
-   * For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.
-   * Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.
-   * It is an alternative to `temperature`; it should not be used if `temperature` is specified.
-   */
-  top_p?: float
-}
diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts
index 3e8f18a81a..b77de42084 100644
--- a/specification/inference/put_anthropic/PutAnthropicRequest.ts
+++ b/specification/inference/put_anthropic/PutAnthropicRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  AnthropicServiceSettings,
+  AnthropicServiceType,
+  AnthropicTaskSettings,
+  AnthropicTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { float, integer } from '@_types/Numeric'
 
 /**
  * Create an Anthropic inference endpoint.
@@ -68,7 +70,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `anthropic`.
      */
-    service: ServiceType
+    service: AnthropicServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `watsonxai` service.
      */
@@ -80,56 +82,3 @@ export interface Request extends RequestBase {
     task_settings?: AnthropicTaskSettings
   }
 }
-
-export enum AnthropicTaskType {
-  completion
-}
-
-export enum ServiceType {
-  anthropic
-}
-
-export class AnthropicServiceSettings {
-  /**
-   * A valid API key for the Anthropic API.
-   */
-  api_key: string
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the Anthropic documentation for the list of supported models.
-   * @ext_doc_id anothropic-models
-   */
-  model_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Anthropic.
-   * By default, the `anthropic` service sets the number of requests allowed per minute to 50.
-   */
-  rate_limit?: RateLimitSetting
-}
-
-export class AnthropicTaskSettings {
-  /**
-   * For a `completion` task, it is the maximum number of tokens to generate before stopping.
-   */
-  max_tokens: integer
-  /**
-   * For a `completion` task, it is the amount of randomness injected into the response.
-   * For more details about the supported range, refer to Anthropic documentation.
-   * @ext_doc_id anthropic-messages
-   */
-  temperature?: float
-  /**
-   * For a `completion` task, it specifies to only sample from the top K options for each subsequent token.
-   * It is recommended for advanced use cases only.
-   * You usually only need to use `temperature`.
-   */
-  top_k?: integer
-  /**
-   * For a `completion` task, it specifies to use Anthropic's nucleus sampling.
-   * In nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.
-   * You should either alter `temperature` or `top_p`, but not both.
-   * It is recommended for advanced use cases only.
-   * You usually only need to use `temperature`.
-   */
-  top_p?: float
-}
diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
index 5e3602f381..0938b0e320 100644
--- a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
+++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  AzureAiStudioServiceSettings,
+  AzureAiStudioServiceType,
+  AzureAiStudioTaskSettings,
+  AzureAiStudioTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { float, integer } from '@_types/Numeric'
 
 /**
  * Create an Azure AI studio inference endpoint.
@@ -67,7 +69,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `azureaistudio`.
      */
-    service: ServiceType
+    service: AzureAiStudioServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `openai` service.
      */
@@ -79,86 +81,3 @@ export interface Request extends RequestBase {
     task_settings?: AzureAiStudioTaskSettings
   }
 }
-
-export enum AzureAiStudioTaskType {
-  completion,
-  text_embedding
-}
-
-export enum ServiceType {
-  azureaistudio
-}
-
-export class AzureAiStudioServiceSettings {
-  /**
-   * A valid API key of your Azure AI Studio model deployment.
-   * This key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id azureaistudio-api-keys
-   */
-  api_key: string
-  /**
-   * The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.
-   * The `token` endpoint type is for "pay as you go" endpoints that are billed per token.
-   * The `realtime` endpoint type is for "real-time" endpoints that are billed per hour of usage.
-   * @ext_doc_id azureaistudio-endpoint-types
-   */
-  endpoint_type: string
-  /**
-   * The target URL of your Azure AI Studio model deployment.
-   * This can be found on the overview page for your deployment in the management section of your Azure AI Studio account.
-   */
-  target: string
-  /**
-   * The model provider for your deployment.
-   * Note that some providers may support only certain task types.
-   * Supported providers include:
-   *
-   * * `cohere` - available for `text_embedding` and `completion` task types
-   * * `databricks` - available for `completion` task type only
-   * * `meta` - available for `completion` task type only
-   * * `microsoft_phi` - available for `completion` task type only
-   * * `mistral` - available for `completion` task type only
-   * * `openai` - available for `text_embedding` and `completion` task types
-   */
-  provider: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.
-   * By default, the `azureaistudio` service sets the number of requests allowed per minute to 240.
-   */
-  rate_limit?: RateLimitSetting
-}
-
-export class AzureAiStudioTaskSettings {
-  /**
-   * For a `completion` task, instruct the inference process to perform sampling.
-   * It has no effect unless `temperature` or `top_p` is specified.
-   */
-  do_sample?: float
-  /**
-   * For a `completion` task, provide a hint for the maximum number of output tokens to be generated.
-   * @server_default 64
-   */
-  max_new_tokens?: integer
-  /**
-   * For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.
-   * It must be a number in the range of 0.0 to 2.0.
-   * It should not be used if `top_p` is specified.
-   */
-  temperature?: float
-  /**
-   * For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.
-   * It is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.
-   * It should not be used if `temperature` is specified.
-   */
-  top_p?: float
-  /**
-   * For a `text_embedding` task, specify the user issuing the request.
-   * This information can be used for abuse detection.
-   */
-  user?: string
-}
diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
index e3b561861e..37094a09c7 100644
--- a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
+++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
@@ -18,9 +18,12 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  AzureOpenAIServiceSettings,
+  AzureOpenAIServiceType,
+  AzureOpenAITaskSettings,
+  AzureOpenAITaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -74,7 +77,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `azureopenai`.
      */
-    service: ServiceType
+    service: AzureOpenAIServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `azureopenai` service.
      */
@@ -86,67 +89,3 @@ export interface Request extends RequestBase {
     task_settings?: AzureOpenAITaskSettings
   }
 }
-
-export enum AzureOpenAITaskType {
-  completion,
-  text_embedding
-}
-
-export enum ServiceType {
-  azureopenai
-}
-
-export class AzureOpenAIServiceSettings {
-  /**
-   * A valid API key for your Azure OpenAI account.
-   * You must specify either `api_key` or `entra_id`.
-   * If you do not provide either or you provide both, you will receive an error when you try to create your model.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id azureopenai-auth
-   */
-  api_key?: string
-  /**
-   * The Azure API version ID to use.
-   * It is recommended to use the latest supported non-preview version.
-   */
-  api_version: string
-  /**
-   * The deployment name of your deployed models.
-   * Your Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.
-   * @ext_doc_id azureopenai
-   */
-  deployment_id: string
-  /**
-   * A valid Microsoft Entra token.
-   * You must specify either `api_key` or `entra_id`.
-   * If you do not provide either or you provide both, you will receive an error when you try to create your model.
-   * @ext_doc_id azureopenai-auth
-   */
-  entra_id?: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Azure.
-   * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
-   * For `text_embedding`, it is set to `1440`.
-   * For `completion`, it is set to `120`.
-   * @ext_doc_id azureopenai-quota-limits
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The name of your Azure OpenAI resource.
-   * You can find this from the list of resources in the Azure Portal for your subscription.
-   * @ext_doc_id azureopenai-portal
-   */
-  resource_name: string
-}
-
-export class AzureOpenAITaskSettings {
-  /**
-   * For a `completion` or `text_embedding` task, specify the user issuing the request.
-   * This information can be used for abuse detection.
-   */
-  user?: string
-}
diff --git a/specification/inference/put_cohere/PutCohereRequest.ts b/specification/inference/put_cohere/PutCohereRequest.ts
index f54a4ef19e..7ede852c87 100644
--- a/specification/inference/put_cohere/PutCohereRequest.ts
+++ b/specification/inference/put_cohere/PutCohereRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  CohereServiceSettings,
+  CohereServiceType,
+  CohereTaskSettings,
+  CohereTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create a Cohere inference endpoint.
@@ -67,7 +69,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `cohere`.
      */
-    service: ServiceType
+    service: CohereServiceType
     /**
      * Settings used to install the inference model.
      * These settings are specific to the `cohere` service.
@@ -80,115 +82,3 @@ export interface Request extends RequestBase {
     task_settings?: CohereTaskSettings
   }
 }
-
-export enum CohereTaskType {
-  completion,
-  rerank,
-  text_embedding
-}
-
-export enum ServiceType {
-  cohere
-}
-
-export enum EmbeddingType {
-  byte,
-  float,
-  int8
-}
-
-export enum InputType {
-  classification,
-  clustering,
-  ingest,
-  search
-}
-
-export enum SimilarityType {
-  cosine,
-  dot_product,
-  l2_norm
-}
-
-export enum TruncateType {
-  END,
-  NONE,
-  START
-}
-
-export class CohereServiceSettings {
-  /**
-   * A valid API key for your Cohere account.
-   * You can find or create your Cohere API keys on the Cohere API key settings page.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id cohere-api-keys
-   */
-  api_key: string
-  /**
-   * For a `text_embedding` task, the types of embeddings you want to get back.
-   * Use `byte` for signed int8 embeddings (this is a synonym of `int8`).
-   * Use `float` for the default float embeddings.
-   * Use `int8` for signed int8 embeddings.
-   * @server_default float
-   */
-  embedding_type?: EmbeddingType
-  /**
-   * For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.
-   *
-   * * For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).
-   * * For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).
-   * * For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).
-   *
-   * The default value for a text embedding task is `embed-english-v2.0`.
-   */
-  model_id?: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Cohere.
-   * By default, the `cohere` service sets the number of requests allowed per minute to 10000.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The similarity measure.
-   * If the `embedding_type` is `float`, the default value is `dot_product`.
-   * If the `embedding_type` is `int8` or `byte`, the default value is `cosine`.
-   */
-  similarity?: SimilarityType
-}
-
-export class CohereTaskSettings {
-  /**
-   * For a `text_embedding` task, the type of input passed to the model.
-   * Valid values are:
-   *
-   * * `classification`: Use it for embeddings passed through a text classifier.
-   * * `clustering`: Use it for the embeddings run through a clustering algorithm.
-   * * `ingest`: Use it for storing document embeddings in a vector database.
-   * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.
-   *
-   * IMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.
-   */
-  input_type?: InputType
-  /**
-   * For a `rerank` task, return doc text within the results.
-   */
-  return_documents?: boolean
-  /**
-   * For a `rerank` task, the number of most relevant documents to return.
-   * It defaults to the number of the documents.
-   * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.
-   */
-  top_n?: integer
-  /**
-   * For a `text_embedding` task, the method to handle inputs longer than the maximum token length.
-   * Valid values are:
-   *
-   * * `END`: When the input exceeds the maximum input token length, the end of the input is discarded.
-   * * `NONE`: When the input exceeds the maximum input token length, an error is returned.
-   * * `START`: When the input exceeds the maximum input token length, the start of the input is discarded.
-   */
-  truncate?: TruncateType
-}
diff --git a/specification/inference/put_eis/PutEisRequest.ts b/specification/inference/put_eis/PutEisRequest.ts
index c788009a32..40c898a6d0 100644
--- a/specification/inference/put_eis/PutEisRequest.ts
+++ b/specification/inference/put_eis/PutEisRequest.ts
@@ -17,7 +17,11 @@
  * under the License.
  */
 
-import { RateLimitSetting } from '@inference/_types/Services'
+import {
+  EisServiceSettings,
+  EisServiceType,
+  EisTaskType
+} from '@inference/_types/CommonTypes'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -53,30 +57,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `elastic`.
      */
-    service: ServiceType
+    service: EisServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `elastic` service.
      */
     service_settings: EisServiceSettings
   }
 }
-
-export enum EisTaskType {
-  chat_completion
-}
-
-export enum ServiceType {
-  elastic
-}
-
-export class EisServiceSettings {
-  /**
-   * The name of the model to use for the inference task.
-   */
-  model_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned.
-   * By default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.
-   */
-  rate_limit?: RateLimitSetting
-}
diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
index 0c8794d9f4..42fe94a298 100644
--- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
+++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts
@@ -17,11 +17,15 @@
  * under the License.
  */
 
-import { AdaptiveAllocations } from '@inference/_types/CommonTypes'
+import {
+  ElasticsearchServiceSettings,
+  ElasticsearchServiceType,
+  ElasticsearchTaskSettings,
+  ElasticsearchTaskType
+} from '@inference/_types/CommonTypes'
 import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create an Elasticsearch inference endpoint.
@@ -73,7 +77,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `elasticsearch`.
      */
-    service: ServiceType
+    service: ElasticsearchServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `elasticsearch` service.
      */
@@ -85,57 +89,3 @@ export interface Request extends RequestBase {
     task_settings?: ElasticsearchTaskSettings
   }
 }
-
-export enum ElasticsearchTaskType {
-  rerank,
-  sparse_embedding,
-  text_embedding
-}
-
-export enum ServiceType {
-  elasticsearch
-}
-
-export class ElasticsearchServiceSettings {
-  /**
-   * Adaptive allocations configuration details.
-   * If `enabled` is true, the number of allocations of the model is set based on the current load the process gets.
-   * When the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.
-   * When the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.
-   * If `enabled` is true, do not set the number of allocations manually.
-   */
-  adaptive_allocations?: AdaptiveAllocations
-  /**
-   * The deployment identifier for a trained model deployment.
-   * When `deployment_id` is used the `model_id` is optional.
-   */
-  deployment_id?: string
-  /**
-   * The name of the model to use for the inference task.
-   * It can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.
-   * @ext_doc_id eland-import
-   */
-  model_id: string
-  /**
-   * The total number of allocations that are assigned to the model across machine learning nodes.
-   * Increasing this value generally increases the throughput.
-   * If adaptive allocations are enabled, do not set this value because it's automatically set.
-   */
-  num_allocations?: integer
-  /**
-   * The number of threads used by each model allocation during inference.
-   * This setting generally increases the speed per inference request.
-   * The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
-   * The value must be a power of 2.
-   * The maximum value is 32.
-   */
-  num_threads: integer
-}
-
-export class ElasticsearchTaskSettings {
-  /**
-   * For a `rerank` task, return the document instead of only the index.
-   * @server_default true
-   */
-  return_documents?: boolean
-}
diff --git a/specification/inference/put_elser/PutElserRequest.ts b/specification/inference/put_elser/PutElserRequest.ts
index 1b03f03bed..41ef57c7aa 100644
--- a/specification/inference/put_elser/PutElserRequest.ts
+++ b/specification/inference/put_elser/PutElserRequest.ts
@@ -17,11 +17,14 @@
  * under the License.
  */
 
-import { AdaptiveAllocations } from '@inference/_types/CommonTypes'
+import {
+  ElserServiceSettings,
+  ElserServiceType,
+  ElserTaskType
+} from '@inference/_types/CommonTypes'
 import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create an ELSER inference endpoint.
@@ -74,46 +77,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `elser`.
      */
-    service: ServiceType
+    service: ElserServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `elser` service.
      */
     service_settings: ElserServiceSettings
   }
 }
-
-export enum ElserTaskType {
-  sparse_embedding
-}
-
-export enum ServiceType {
-  elser
-}
-
-export class ElserServiceSettings {
-  /**
-   * Adaptive allocations configuration details.
-   * If `enabled` is true, the number of allocations of the model is set based on the current load the process gets.
-   * When the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.
-   * When the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.
-   * If `enabled` is true, do not set the number of allocations manually.
-   */
-  adaptive_allocations?: AdaptiveAllocations
-  /**
-   * The total number of allocations this model is assigned across machine learning nodes.
-   * Increasing this value generally increases the throughput.
-   * If adaptive allocations is enabled, do not set this value because it's automatically set.
-   */
-  num_allocations: integer
-  /**
-   * The number of threads used by each model allocation during inference.
-   * Increasing this value generally increases the speed per inference request.
-   * The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.
-   * The value must be a power of 2.
-   * The maximum value is 32.
-   *
-   * > info
-   * > If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.
-   */
-  num_threads: integer
-}
diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
index aeac77c0b7..87a43555e1 100644
--- a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
+++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
@@ -18,9 +18,11 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  GoogleAiServiceType,
+  GoogleAiStudioServiceSettings,
+  GoogleAiStudioTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -66,37 +68,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `googleaistudio`.
      */
-    service: ServiceType
+    service: GoogleAiServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `googleaistudio` service.
      */
     service_settings: GoogleAiStudioServiceSettings
   }
 }
-
-export enum GoogleAiStudioTaskType {
-  completion,
-  text_embedding
-}
-
-export enum ServiceType {
-  googleaistudio
-}
-
-export class GoogleAiStudioServiceSettings {
-  /**
-   * A valid API key of your Google Gemini account.
-   */
-  api_key: string
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the Google documentation for the list of supported models.
-   * @ext_doc_id googleaistudio-models
-   */
-  model_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Google AI Studio.
-   * By default, the `googleaistudio` service sets the number of requests allowed per minute to 360.
-   */
-  rate_limit?: RateLimitSetting
-}
diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
index 978a384d20..17276aa4eb 100644
--- a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
+++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  GoogleVertexAIServiceSettings,
+  GoogleVertexAIServiceType,
+  GoogleVertexAITaskSettings,
+  GoogleVertexAITaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create a Google Vertex AI inference endpoint.
@@ -67,7 +69,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `googlevertexai`.
      */
-    service: ServiceType
+    service: GoogleVertexAIServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `googlevertexai` service.
      */
@@ -79,51 +81,3 @@ export interface Request extends RequestBase {
     task_settings?: GoogleVertexAITaskSettings
   }
 }
-
-export enum GoogleVertexAITaskType {
-  rerank,
-  text_embedding
-}
-
-export enum ServiceType {
-  googlevertexai
-}
-
-export class GoogleVertexAIServiceSettings {
-  /**
-   * The name of the location to use for the inference task.
-   * Refer to the Google documentation for the list of supported locations.
-   * @ext_doc_id googlevertexai-locations
-   */
-  location: string
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the Google documentation for the list of supported models.
-   * @ext_doc_id googlevertexai-models
-   */
-  model_id: string
-  /**
-   * The name of the project to use for the inference task.
-   */
-  project_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.
-   * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * A valid service account in JSON format for the Google Vertex AI API.
-   */
-  service_account_json: string
-}
-
-export class GoogleVertexAITaskSettings {
-  /**
-   * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.
-   */
-  auto_truncate?: boolean
-  /**
-   * For a `rerank` task, the number of the top N documents that should be returned.
-   */
-  top_n?: integer
-}
diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
index 5660473643..c32861e39d 100644
--- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
+++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
@@ -18,9 +18,11 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  HuggingFaceServiceSettings,
+  HuggingFaceServiceType,
+  HuggingFaceTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -80,41 +82,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `hugging_face`.
      */
-    service: ServiceType
+    service: HuggingFaceServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `hugging_face` service.
      */
     service_settings: HuggingFaceServiceSettings
   }
 }
-
-export enum HuggingFaceTaskType {
-  text_embedding
-}
-
-export enum ServiceType {
-  hugging_face
-}
-
-export class HuggingFaceServiceSettings {
-  /**
-   * A valid access token for your HuggingFace account.
-   * You can create or find your access tokens on the HuggingFace settings page.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id huggingface-tokens
-   */
-  api_key: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Hugging Face.
-   * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The URL endpoint to use for the requests.
-   */
-  url: string
-}
diff --git a/specification/inference/put_jinaai/PutJinaAiRequest.ts b/specification/inference/put_jinaai/PutJinaAiRequest.ts
index 91486dc832..68cca23146 100644
--- a/specification/inference/put_jinaai/PutJinaAiRequest.ts
+++ b/specification/inference/put_jinaai/PutJinaAiRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  JinaAIServiceSettings,
+  JinaAIServiceType,
+  JinaAITaskSettings,
+  JinaAITaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create an JinaAI inference endpoint.
@@ -70,7 +72,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `jinaai`.
      */
-    service: ServiceType
+    service: JinaAIServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `jinaai` service.
      */
@@ -82,79 +84,3 @@ export interface Request extends RequestBase {
     task_settings?: JinaAITaskSettings
   }
 }
-
-export enum JinaAITaskType {
-  rerank,
-  text_embedding
-}
-
-export enum ServiceType {
-  jinaai
-}
-
-export enum SimilarityType {
-  cosine,
-  dot_product,
-  l2_norm
-}
-
-export enum TextEmbeddingTask {
-  classification,
-  clustering,
-  ingest,
-  search
-}
-
-export class JinaAIServiceSettings {
-  /**
-   * A valid API key of your JinaAI account.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id jinaAi-embeddings
-   */
-  api_key: string
-  /**
-   * The name of the model to use for the inference task.
-   * For a `rerank` task, it is required.
-   * For a `text_embedding` task, it is optional.
-   */
-  model_id?: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from JinaAI.
-   * By default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.
-   * @ext_doc_id jinaAi-rate-limit
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
-   * The default values varies with the embedding type.
-   * For example, a float embedding type uses a `dot_product` similarity measure by default.
-   */
-  similarity?: SimilarityType
-}
-
-export class JinaAITaskSettings {
-  /**
-   * For a `rerank` task, return the doc text within the results.
-   */
-  return_documents?: boolean
-  /**
-   * For a `text_embedding` task, the task passed to the model.
-   * Valid values are:
-   *
-   * * `classification`: Use it for embeddings passed through a text classifier.
-   * * `clustering`: Use it for the embeddings run through a clustering algorithm.
-   * * `ingest`: Use it for storing document embeddings in a vector database.
-   * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.
-   */
-  task?: TextEmbeddingTask
-  /**
-   * For a `rerank` task, the number of most relevant documents to return.
-   * It defaults to the number of the documents.
-   * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.
-   */
-  top_n?: integer
-}
diff --git a/specification/inference/put_mistral/PutMistralRequest.ts b/specification/inference/put_mistral/PutMistralRequest.ts
index 4aaa32acb9..5bd69fd21d 100644
--- a/specification/inference/put_mistral/PutMistralRequest.ts
+++ b/specification/inference/put_mistral/PutMistralRequest.ts
@@ -18,12 +18,13 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  MistralServiceSettings,
+  MistralServiceType,
+  MistralTaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create a Mistral inference endpoint.
@@ -68,47 +69,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `mistral`.
      */
-    service: ServiceType
+    service: MistralServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `mistral` service.
      */
     service_settings: MistralServiceSettings
   }
 }
-
-export enum MistralTaskType {
-  text_embedding
-}
-
-export enum ServiceType {
-  mistral
-}
-
-export class MistralServiceSettings {
-  /**
-   * A valid API key of your Mistral account.
-   * You can find your Mistral API keys or you can create a new one on the API Keys page.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id mistral-api-keys
-   */
-  api_key: string
-  /**
-   * The maximum number of tokens per input before chunking occurs.
-   */
-  max_input_tokens?: integer
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the Mistral models documentation for the list of available text embedding models.
-   * @ext_doc_id mistral-api-models
-   */
-  model: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from the Mistral API.
-   * By default, the `mistral` service sets the number of requests allowed per minute to 240.
-   */
-  rate_limit?: RateLimitSetting
-}
diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts
index 3453d0cff3..d45326e308 100644
--- a/specification/inference/put_openai/PutOpenAiRequest.ts
+++ b/specification/inference/put_openai/PutOpenAiRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  OpenAIServiceSettings,
+  OpenAIServiceType,
+  OpenAITaskSettings,
+  OpenAITaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { integer } from '@_types/Numeric'
 
 /**
  * Create an OpenAI inference endpoint.
@@ -68,7 +70,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `openai`.
      */
-    service: ServiceType
+    service: OpenAIServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `openai` service.
      */
@@ -80,65 +82,3 @@ export interface Request extends RequestBase {
     task_settings?: OpenAITaskSettings
   }
 }
-
-export enum OpenAITaskType {
-  chat_completion,
-  completion,
-  text_embedding
-}
-
-export enum ServiceType {
-  openai
-}
-
-export class OpenAIServiceSettings {
-  /**
-   * A valid API key of your OpenAI account.
-   * You can find your OpenAI API keys in your OpenAI account under the API keys section.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id openai-api-keys
-   */
-  api_key: string
-  /**
-   * The number of dimensions the resulting output embeddings should have.
-   * It is supported only in `text-embedding-3` and later models.
-   * If it is not set, the OpenAI defined default for the model is used.
-   */
-  dimensions?: integer
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the OpenAI documentation for the list of available text embedding models.
-   * @ext_doc_id openai-models
-   */
-  model_id: string
-  /**
-   * The unique identifier for your organization.
-   * You can find the Organization ID in your OpenAI account under *Settings > Organizations*.
-   */
-  organization_id?: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from OpenAI.
-   * The `openai` service sets a default number of requests allowed per minute depending on the task type.
-   * For `text_embedding`, it is set to `3000`.
-   * For `completion`, it is set to `500`.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The URL endpoint to use for the requests.
-   * It can be changed for testing purposes.
-   * @server_default https://api.openai.com/v1/embeddings.
-   */
-  url?: string
-}
-
-export class OpenAITaskSettings {
-  /**
-   * For a `completion` or `text_embedding` task, specify the user issuing the request.
-   * This information can be used for abuse detection.
-   */
-  user?: string
-}
diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts
index ef016b1ff7..0e29dec2ce 100644
--- a/specification/inference/put_voyageai/PutVoyageAIRequest.ts
+++ b/specification/inference/put_voyageai/PutVoyageAIRequest.ts
@@ -18,12 +18,14 @@
  */
 
 import {
-  InferenceChunkingSettings,
-  RateLimitSetting
-} from '@inference/_types/Services'
+  VoyageAIServiceSettings,
+  VoyageAIServiceType,
+  VoyageAITaskSettings,
+  VoyageAITaskType
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
-import { float, integer } from '@_types/Numeric'
 
 /**
    * Create a VoyageAI inference endpoint.
@@ -63,7 +65,7 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `voyageai`.
      */
-    service: ServiceType
+    service: VoyageAIServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `voyageai` service.
      */
@@ -75,71 +77,3 @@ export interface Request extends RequestBase {
     task_settings?: VoyageAITaskSettings
   }
 }
-
-export enum VoyageAITaskType {
-  text_embedding,
-  rerank
-}
-
-export enum ServiceType {
-  voyageai
-}
-
-export class VoyageAIServiceSettings {
-  /**
-   * The number of dimensions for resulting output embeddings.
-   * This setting maps to `output_dimension` in the VoyageAI documentation.
-   * Only for the `text_embedding` task type.
-   * @ext_doc_id voyageai-embeddings
-   */
-  dimensions?: integer
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the VoyageAI documentation for the list of available text embedding and rerank models.
-   * @ext_doc_id voyageai-embeddings
-   * @ext_doc_id voyageai-rerank
-   */
-  model_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from VoyageAI.
-   * The `voyageai` service sets a default number of requests allowed per minute depending on the task type.
-   * For both `text_embedding` and `rerank`, it is set to `2000`.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The data type for the embeddings to be returned.
-   * This setting maps to `output_dtype` in the VoyageAI documentation.
-   * Permitted values: float, int8, bit.
-   * `int8` is a synonym of `byte` in the VoyageAI documentation.
-   * `bit` is a synonym of `binary` in the VoyageAI documentation.
-   * Only for the `text_embedding` task type.
-   * @ext_doc_id voyageai-embeddings
-   */
-  embedding_type?: float
-}
-
-export class VoyageAITaskSettings {
-  /**
-   * Type of the input text.
-   * Permitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).
-   * Only for the `text_embedding` task type.
-   */
-  input_type?: string
-  /**
-   * Whether to return the source documents in the response.
-   * Only for the `rerank` task type.
-   * @server_default false
-   */
-  return_documents?: boolean
-  /**
-   * The number of most relevant documents to return.
-   * If not specified, the reranking results of all documents will be returned.
-   * Only for the `rerank` task type.
-   */
-  top_k?: integer
-  /**
-   * Whether to truncate the input texts to fit within the context length.
-   * @server_default true
-   */
-  truncation?: boolean
-}
diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts
index 2721524316..3718ee1c94 100644
--- a/specification/inference/put_watsonx/PutWatsonxRequest.ts
+++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts
@@ -17,7 +17,11 @@
  * under the License.
  */
 
-import { RateLimitSetting } from '@inference/_types/Services'
+import {
+  WatsonxServiceSettings,
+  WatsonxServiceType,
+  WatsonxTaskType
+} from '@inference/_types/CommonTypes'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -61,57 +65,10 @@ export interface Request extends RequestBase {
     /**
      * The type of service supported for the specified task type. In this case, `watsonxai`.
      */
-    service: ServiceType
+    service: WatsonxServiceType
     /**
      * Settings used to install the inference model. These settings are specific to the `watsonxai` service.
      */
     service_settings: WatsonxServiceSettings
   }
 }
-
-export enum WatsonxTaskType {
-  text_embedding
-}
-
-export enum ServiceType {
-  watsonxai
-}
-
-export class WatsonxServiceSettings {
-  /**
-   * A valid API key of your Watsonx account.
-   * You can find your Watsonx API keys or you can create a new one on the API keys page.
-   *
-   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
-   * The get inference endpoint API does not retrieve your API key.
-   * After creating the inference model, you cannot change the associated API key.
-   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
-   * @ext_doc_id watsonx-api-keys
-   */
-  api_key: string
-  /**
-   * A version parameter that takes a version date in the format of `YYYY-MM-DD`.
-   * For the active version data parameters, refer to the Wastonx documentation.
-   * @ext_doc_id watsonx-api-version
-   */
-  api_version: string
-  /**
-   * The name of the model to use for the inference task.
-   * Refer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.
-   * @ext_doc_id watsonx-api-models
-   */
-  model_id: string
-  /**
-   * The identifier of the IBM Cloud project to use for the inference task.
-   */
-  project_id: string
-  /**
-   * This setting helps to minimize the number of rate limit errors returned from Watsonx.
-   * By default, the `watsonxai` service sets the number of requests allowed per minute to 120.
-   */
-  rate_limit?: RateLimitSetting
-  /**
-   * The URL of the inference endpoint that you created on Watsonx.
-   */
-  url: string
-}