Add examples for chat_completion and completion tasks using various Google Model Garden providers with updated URL formats

Jan-Kazlouski-elastic · Jan-Kazlouski-elastic · commit 2c4bcb59c42b · 2025-10-29T12:22:26.000Z
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1446,6 +1446,7 @@ export class GoogleVertexAIServiceSettings {
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).
    * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.
    * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.
    */
   url?: string
   /**
@@ -1455,6 +1456,7 @@ export class GoogleVertexAIServiceSettings {
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).
    * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.
    * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.
    */
   streaming_url?: string
   /**
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
@@ -1,14 +1,13 @@
-summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+summary: A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "mistral",
-          "model_id": "mistral-small-2503",
+          "provider": "meta",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url:streamRawPredict"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
@@ -1,14 +1,13 @@
-summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
-method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+summary: A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "ai21",
+          "provider": "hugging_face",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://url:streamRawPredict"
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
@@ -1,13 +1,13 @@
-summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.
-method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+summary: A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
 # type: "request"
 value: |-
   {
       "service": "googlevertexai",
       "service_settings": {
-          "provider": "ai21",
+          "provider": "hugging_face",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://url:streamRawPredict"
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
       }
   }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml
@@ -0,0 +1,15 @@
+summary: A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks
-description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.
+summary: A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.
 method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion'
 # type: "request"
 value: |-
@@ -8,8 +8,8 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
@@ -1,5 +1,5 @@
-summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided
-description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.
+summary: A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.
 method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion'
 # type: "request"
 value: |-
@@ -8,7 +8,7 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml
diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml

Original file line number	Diff line number	Diff line change
`@@ -1,13 +1,13 @@`
`1`		`-summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided`
`2`		-description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.
`3`		`-method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'`
	`1`	`+summary: A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided`
	`2`	+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
	`3`	`+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'`
`4`	`4`	`# type: "request"`
`5`	`5`	`value: \|-`
`6`	`6`	`{`
`7`	`7`	`"service": "googlevertexai",`
`8`	`8`	`"service_settings": {`
`9`		`- "provider": "ai21",`
	`9`	`+ "provider": "hugging_face",`
`10`	`10`	`"service_account_json": "service-account-json",`
`11`		`- "streaming_url": "https://url:streamRawPredict"`
	`11`	`+ "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"`
`12`	`12`	`}`
`13`	`13`	`}`