diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 06d0639047..3e421ea017 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -17421,7 +17421,7 @@
           "inference"
         ],
         "summary": "Perform chat completion inference\n",
-        "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+        "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
         "operationId": "inference-chat-completion-unified",
         "parameters": [
           {
@@ -79710,7 +79710,9 @@
         "type": "string",
         "enum": [
           "rerank",
-          "text_embedding"
+          "text_embedding",
+          "completion",
+          "chat_completion"
         ]
       },
       "inference._types.GoogleVertexAIServiceType": {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index c79f5a2242..e367041237 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -9480,7 +9480,7 @@
           "inference"
         ],
         "summary": "Perform chat completion inference\n",
-        "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+        "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
         "operationId": "inference-chat-completion-unified",
         "parameters": [
           {
@@ -50972,7 +50972,9 @@
         "type": "string",
         "enum": [
           "rerank",
-          "text_embedding"
+          "text_embedding",
+          "completion",
+          "chat_completion"
         ]
       },
       "inference._types.GoogleVertexAIServiceType": {
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index 207419a4e4..59ab1534fc 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -4506,7 +4506,7 @@
           "visibility": "public"
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
       "docId": "inference-api-chat-completion",
       "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html",
       "name": "inference.chat_completion_unified",
@@ -27588,7 +27588,7 @@
           }
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
       "examples": {
         "PostChatCompletionRequestExample1": {
           "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.",
@@ -104617,7 +104617,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L871-L873"
+      "specLocation": "inference/_types/CommonTypes.ts#L873-L875"
     },
     {
       "kind": "enum",
@@ -104627,13 +104627,19 @@
         },
         {
           "name": "text_embedding"
+        },
+        {
+          "name": "completion"
+        },
+        {
+          "name": "chat_completion"
         }
       ],
       "name": {
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L866-L869"
+      "specLocation": "inference/_types/CommonTypes.ts#L866-L871"
     },
     {
       "kind": "enum",
@@ -104646,7 +104652,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L902-L904"
+      "specLocation": "inference/_types/CommonTypes.ts#L904-L906"
     },
     {
       "kind": "enum",
@@ -104659,7 +104665,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L898-L900"
+      "specLocation": "inference/_types/CommonTypes.ts#L900-L902"
     },
     {
       "kind": "enum",
@@ -104672,7 +104678,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L965-L967"
+      "specLocation": "inference/_types/CommonTypes.ts#L967-L969"
     },
     {
       "kind": "enum",
@@ -104691,7 +104697,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L969-L973"
+      "specLocation": "inference/_types/CommonTypes.ts#L971-L975"
     },
     {
       "kind": "enum",
@@ -104707,7 +104713,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L960-L963"
+      "specLocation": "inference/_types/CommonTypes.ts#L962-L965"
     },
     {
       "kind": "enum",
@@ -104729,7 +104735,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L975-L980"
+      "specLocation": "inference/_types/CommonTypes.ts#L977-L982"
     },
     {
       "codegenNames": [
@@ -104811,7 +104817,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1015-L1017"
+      "specLocation": "inference/_types/CommonTypes.ts#L1017-L1019"
     },
     {
       "kind": "enum",
@@ -104824,7 +104830,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013"
+      "specLocation": "inference/_types/CommonTypes.ts#L1013-L1015"
     },
     {
       "kind": "enum",
@@ -104837,7 +104843,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1077-L1079"
+      "specLocation": "inference/_types/CommonTypes.ts#L1079-L1081"
     },
     {
       "kind": "enum",
@@ -104856,7 +104862,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1071-L1075"
+      "specLocation": "inference/_types/CommonTypes.ts#L1073-L1077"
     },
     {
       "kind": "type_alias",
@@ -104981,7 +104987,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1145-L1147"
+      "specLocation": "inference/_types/CommonTypes.ts#L1147-L1149"
     },
     {
       "kind": "enum",
@@ -104997,7 +105003,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1140-L1143"
+      "specLocation": "inference/_types/CommonTypes.ts#L1142-L1145"
     },
     {
       "kind": "enum",
@@ -105010,7 +105016,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1192-L1194"
+      "specLocation": "inference/_types/CommonTypes.ts#L1194-L1196"
     },
     {
       "kind": "enum",
@@ -105023,7 +105029,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190"
+      "specLocation": "inference/_types/CommonTypes.ts#L1190-L1192"
     },
     {
       "kind": "enum",
@@ -126885,7 +126891,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L875-L896"
+      "specLocation": "inference/_types/CommonTypes.ts#L877-L898"
     },
     {
       "kind": "interface",
@@ -126947,7 +126953,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L906-L935"
+      "specLocation": "inference/_types/CommonTypes.ts#L908-L937"
     },
     {
       "kind": "interface",
@@ -126993,7 +126999,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L937-L958"
+      "specLocation": "inference/_types/CommonTypes.ts#L939-L960"
     },
     {
       "inherits": {
@@ -127095,7 +127101,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L982-L1009"
+      "specLocation": "inference/_types/CommonTypes.ts#L984-L1011"
     },
     {
       "kind": "interface",
@@ -127182,7 +127188,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1019-L1061"
+      "specLocation": "inference/_types/CommonTypes.ts#L1021-L1063"
     },
     {
       "kind": "interface",
@@ -127204,7 +127210,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1063-L1069"
+      "specLocation": "inference/_types/CommonTypes.ts#L1065-L1071"
     },
     {
       "kind": "interface",
@@ -127268,7 +127274,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1081-L1112"
+      "specLocation": "inference/_types/CommonTypes.ts#L1083-L1114"
     },
     {
       "kind": "interface",
@@ -127328,7 +127334,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1114-L1138"
+      "specLocation": "inference/_types/CommonTypes.ts#L1116-L1140"
     },
     {
       "kind": "interface",
@@ -127416,7 +127422,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1149-L1186"
+      "specLocation": "inference/_types/CommonTypes.ts#L1151-L1188"
     },
     {
       "description": "Defines the response for a rerank request.",
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 48676ec857..42a3a65bbe 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9186,7 +9186,7 @@
           "visibility": "public"
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
       "docId": "inference-api-chat-completion",
       "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html",
       "name": "inference.chat_completion_unified",
@@ -153510,7 +153510,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L871-L873"
+      "specLocation": "inference/_types/CommonTypes.ts#L873-L875"
     },
     {
       "kind": "interface",
@@ -153554,13 +153554,19 @@
         },
         {
           "name": "text_embedding"
+        },
+        {
+          "name": "completion"
+        },
+        {
+          "name": "chat_completion"
         }
       ],
       "name": {
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L866-L869"
+      "specLocation": "inference/_types/CommonTypes.ts#L866-L871"
     },
     {
       "kind": "interface",
@@ -153608,7 +153614,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L875-L896"
+      "specLocation": "inference/_types/CommonTypes.ts#L877-L898"
     },
     {
       "kind": "enum",
@@ -153621,7 +153627,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L902-L904"
+      "specLocation": "inference/_types/CommonTypes.ts#L904-L906"
     },
     {
       "kind": "enum",
@@ -153634,7 +153640,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L898-L900"
+      "specLocation": "inference/_types/CommonTypes.ts#L900-L902"
     },
     {
       "kind": "interface",
@@ -154037,7 +154043,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L906-L935"
+      "specLocation": "inference/_types/CommonTypes.ts#L908-L937"
     },
     {
       "kind": "enum",
@@ -154050,7 +154056,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L965-L967"
+      "specLocation": "inference/_types/CommonTypes.ts#L967-L969"
     },
     {
       "kind": "enum",
@@ -154069,7 +154075,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L969-L973"
+      "specLocation": "inference/_types/CommonTypes.ts#L971-L975"
     },
     {
       "kind": "interface",
@@ -154115,7 +154121,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L937-L958"
+      "specLocation": "inference/_types/CommonTypes.ts#L939-L960"
     },
     {
       "kind": "enum",
@@ -154131,7 +154137,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L960-L963"
+      "specLocation": "inference/_types/CommonTypes.ts#L962-L965"
     },
     {
       "kind": "enum",
@@ -154153,7 +154159,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L975-L980"
+      "specLocation": "inference/_types/CommonTypes.ts#L977-L982"
     },
     {
       "kind": "interface",
@@ -154311,7 +154317,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L982-L1009"
+      "specLocation": "inference/_types/CommonTypes.ts#L984-L1011"
     },
     {
       "kind": "enum",
@@ -154324,7 +154330,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1015-L1017"
+      "specLocation": "inference/_types/CommonTypes.ts#L1017-L1019"
     },
     {
       "kind": "enum",
@@ -154337,7 +154343,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013"
+      "specLocation": "inference/_types/CommonTypes.ts#L1013-L1015"
     },
     {
       "kind": "interface",
@@ -154424,7 +154430,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1019-L1061"
+      "specLocation": "inference/_types/CommonTypes.ts#L1021-L1063"
     },
     {
       "kind": "enum",
@@ -154437,7 +154443,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1077-L1079"
+      "specLocation": "inference/_types/CommonTypes.ts#L1079-L1081"
     },
     {
       "kind": "interface",
@@ -154459,7 +154465,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1063-L1069"
+      "specLocation": "inference/_types/CommonTypes.ts#L1065-L1071"
     },
     {
       "kind": "enum",
@@ -154478,7 +154484,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1071-L1075"
+      "specLocation": "inference/_types/CommonTypes.ts#L1073-L1077"
     },
     {
       "kind": "interface",
@@ -155088,7 +155094,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1081-L1112"
+      "specLocation": "inference/_types/CommonTypes.ts#L1083-L1114"
     },
     {
       "kind": "enum",
@@ -155101,7 +155107,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1145-L1147"
+      "specLocation": "inference/_types/CommonTypes.ts#L1147-L1149"
     },
     {
       "kind": "interface",
@@ -155161,7 +155167,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1114-L1138"
+      "specLocation": "inference/_types/CommonTypes.ts#L1116-L1140"
     },
     {
       "kind": "enum",
@@ -155177,7 +155183,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1140-L1143"
+      "specLocation": "inference/_types/CommonTypes.ts#L1142-L1145"
     },
     {
       "kind": "interface",
@@ -155265,7 +155271,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1149-L1186"
+      "specLocation": "inference/_types/CommonTypes.ts#L1151-L1188"
     },
     {
       "kind": "enum",
@@ -155278,7 +155284,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1192-L1194"
+      "specLocation": "inference/_types/CommonTypes.ts#L1194-L1196"
     },
     {
       "kind": "enum",
@@ -155291,7 +155297,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190"
+      "specLocation": "inference/_types/CommonTypes.ts#L1190-L1192"
     },
     {
       "kind": "request",
@@ -155309,7 +155315,7 @@
           }
         }
       },
-      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
+      "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.",
       "examples": {
         "PostChatCompletionRequestExample1": {
           "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.",
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 6a0d18c925..31125bc370 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13430,7 +13430,7 @@ export interface InferenceGoogleVertexAITaskSettings {
   top_n?: integer
 }
 
-export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding'
+export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion'
 
 export interface InferenceHuggingFaceServiceSettings {
   api_key: string
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 5c0757abd2..2962cade79 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -865,7 +865,9 @@ export class GoogleVertexAITaskSettings {
 
 export enum GoogleVertexAITaskType {
   rerank,
-  text_embedding
+  text_embedding,
+  completion,
+  chat_completion
 }
 
 export enum GoogleVertexAIServiceType {
diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts
index 3a5498defd..891d7a2c3d 100644
--- a/specification/inference/chat_completion_unified/UnifiedRequest.ts
+++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts
@@ -25,7 +25,7 @@ import { Duration } from '@_types/Time'
  * Perform chat completion inference
  * 
  * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. 
- * It only works with the `chat_completion` task type for `openai` and `elastic` inference services.
+ * It only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.
 
  * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.
  * For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.