diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 0ccf8d2abd..8cbb2651b7 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22746,20 +22746,60 @@ "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } } } @@ -102977,11 +103017,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "location": { @@ -103030,7 +103070,11 @@ "type": "string", "enum": [ "google", - "anthropic" + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" ] }, "inference._types.GoogleVertexAITaskSettings": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index cf5429bf8d..32d051c61b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13728,20 +13728,60 @@ "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "summary": "A completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } } } @@ -67057,11 +67097,11 @@ ] }, "url": { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "streaming_url": { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "type": "string" }, "location": { @@ -67110,7 +67150,11 @@ "type": "string", "enum": [ "google", - "anthropic" + "anthropic", + "meta", + "hugging_face", + "mistral", + "ai21" ] }, "inference._types.GoogleVertexAITaskSettings": { diff --git a/output/schema/schema.json b/output/schema/schema.json index a4233bd1db..49ba042117 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -173326,13 +173326,25 @@ }, { "name": "anthropic" + }, + { + "name": "meta" + }, + { + "name": "hugging_face" + }, + { + "name": "mistral" + }, + { + "name": "ai21" } ], "name": { "name": "GoogleModelGardenProvider", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1498-L1501" + "specLocation": "inference/_types/CommonTypes.ts#L1500-L1507" }, { "kind": "interface", @@ -173354,7 +173366,7 @@ } }, { - "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "name": "url", "required": false, "type": { @@ -173366,7 +173378,7 @@ } }, { - "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.\nCertain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).", "name": "streaming_url", "required": false, "type": { @@ -173456,7 +173468,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1433-L1496" + "specLocation": "inference/_types/CommonTypes.ts#L1433-L1498" }, { "kind": "enum", @@ -173469,7 +173481,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1542-L1544" + "specLocation": "inference/_types/CommonTypes.ts#L1548-L1550" }, { "kind": "interface", @@ -173531,7 +173543,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1503-L1526" + "specLocation": "inference/_types/CommonTypes.ts#L1509-L1532" }, { "kind": "enum", @@ -173553,7 +173565,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1535-L1540" + "specLocation": "inference/_types/CommonTypes.ts#L1541-L1546" }, { "kind": "interface", @@ -173615,7 +173627,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1546-L1578" + "specLocation": "inference/_types/CommonTypes.ts#L1552-L1584" }, { "kind": "enum", @@ -173628,7 +173640,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1599-L1601" + "specLocation": "inference/_types/CommonTypes.ts#L1605-L1607" }, { "kind": "interface", @@ -173662,7 +173674,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1580-L1590" + "specLocation": "inference/_types/CommonTypes.ts#L1586-L1596" }, { "kind": "enum", @@ -173684,7 +173696,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1592-L1597" + "specLocation": "inference/_types/CommonTypes.ts#L1598-L1603" }, { "kind": "interface", @@ -174916,7 +174928,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1603-L1632" + "specLocation": "inference/_types/CommonTypes.ts#L1609-L1638" }, { "kind": "enum", @@ -174929,7 +174941,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1662-L1664" + "specLocation": "inference/_types/CommonTypes.ts#L1668-L1670" }, { "kind": "enum", @@ -174948,7 +174960,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1666-L1670" + "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676" }, { "kind": "interface", @@ -174994,7 +175006,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1634-L1655" + "specLocation": "inference/_types/CommonTypes.ts#L1640-L1661" }, { "kind": "enum", @@ -175010,7 +175022,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1657-L1660" + "specLocation": "inference/_types/CommonTypes.ts#L1663-L1666" }, { "kind": "enum", @@ -175032,7 +175044,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1672-L1677" + "specLocation": "inference/_types/CommonTypes.ts#L1678-L1683" }, { "kind": "interface", @@ -175104,7 +175116,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1679-L1709" + "specLocation": "inference/_types/CommonTypes.ts#L1685-L1715" }, { "kind": "enum", @@ -175117,7 +175129,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1717-L1719" + "specLocation": "inference/_types/CommonTypes.ts#L1723-L1725" }, { "kind": "enum", @@ -175136,7 +175148,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1721-L1725" + "specLocation": "inference/_types/CommonTypes.ts#L1727-L1731" }, { "kind": "enum", @@ -175155,7 +175167,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1711-L1715" + "specLocation": "inference/_types/CommonTypes.ts#L1717-L1721" }, { "kind": "interface", @@ -175313,7 +175325,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1727-L1754" + "specLocation": "inference/_types/CommonTypes.ts#L1733-L1760" }, { "kind": "enum", @@ -175326,7 +175338,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1762-L1764" + "specLocation": "inference/_types/CommonTypes.ts#L1768-L1770" }, { "kind": "enum", @@ -175345,7 +175357,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1756-L1760" + "specLocation": "inference/_types/CommonTypes.ts#L1762-L1766" }, { "kind": "interface", @@ -175432,7 +175444,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1766-L1808" + "specLocation": "inference/_types/CommonTypes.ts#L1772-L1814" }, { "kind": "enum", @@ -175445,7 +175457,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1835-L1837" + "specLocation": "inference/_types/CommonTypes.ts#L1841-L1843" }, { "kind": "interface", @@ -175475,7 +175487,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1810-L1827" + "specLocation": "inference/_types/CommonTypes.ts#L1816-L1833" }, { "kind": "enum", @@ -175494,7 +175506,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1829-L1833" + "specLocation": "inference/_types/CommonTypes.ts#L1835-L1839" }, { "kind": "interface", @@ -176340,7 +176352,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1528-L1533" + "specLocation": "inference/_types/CommonTypes.ts#L1534-L1539" }, { "kind": "interface", @@ -176486,7 +176498,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1839-L1870" + "specLocation": "inference/_types/CommonTypes.ts#L1845-L1876" }, { "kind": "enum", @@ -176499,7 +176511,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1903-L1905" + "specLocation": "inference/_types/CommonTypes.ts#L1909-L1911" }, { "kind": "interface", @@ -176559,7 +176571,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1872-L1896" + "specLocation": "inference/_types/CommonTypes.ts#L1878-L1902" }, { "kind": "enum", @@ -176575,7 +176587,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1898-L1901" + "specLocation": "inference/_types/CommonTypes.ts#L1904-L1907" }, { "kind": "interface", @@ -176663,7 +176675,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1907-L1945" + "specLocation": "inference/_types/CommonTypes.ts#L1913-L1951" }, { "kind": "enum", @@ -176676,7 +176688,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1953-L1955" + "specLocation": "inference/_types/CommonTypes.ts#L1959-L1961" }, { "kind": "enum", @@ -176695,7 +176707,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1947-L1951" + "specLocation": "inference/_types/CommonTypes.ts#L1953-L1957" }, { "kind": "request", @@ -180429,6 +180441,24 @@ "summary": "A text embedding task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, + "PutGoogleVertexAiRequestExample10": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_mistral_chat_completion", + "summary": "A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample11": { + "description": "Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_ai21_completion", + "summary": "A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample12": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_ai21_chat_completion", + "summary": "A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"ai21\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" + }, "PutGoogleVertexAiRequestExample2": { "alternatives": [ { @@ -180458,16 +180488,46 @@ "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" }, "PutGoogleVertexAiRequestExample3": { - "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", - "summary": "A completion task for Google Model Garden Anthropic endpoint", + "summary": "A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" }, "PutGoogleVertexAiRequestExample4": { - "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided.", "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", - "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample5": { + "description": "Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_meta_completion", + "summary": "A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample6": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_meta_chat_completion", + "summary": "A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"meta\",\n \"model_id\": \"meta/llama-3.3-70b-instruct-maas\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/openapi/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample7": { + "description": "Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_hugging_face_completion", + "summary": "A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample8": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided.", + "method_request": "PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion", + "summary": "A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"hugging_face\",\n \"service_account_json\": \"service-account-json\",\n \"streaming_url\": \"https://url/chat/completions\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample9": { + "description": "Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks.", + "method_request": "PUT _inference/completion/google_model_garden_mistral_completion", + "summary": "A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"mistral\",\n \"model_id\": \"mistral-small-2503\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://url:streamRawPredict\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index d05f3df2d3..e135da88d6 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14251,7 +14251,7 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' -export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' | 'meta' | 'hugging_face' | 'mistral' | 'ai21' export interface InferenceGoogleVertexAIServiceSettings { provider?: InferenceGoogleModelGardenProvider diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 10f45fd4d7..645cc3d275 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1445,6 +1445,7 @@ export class GoogleVertexAIServiceSettings { * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). */ url?: string /** @@ -1453,6 +1454,7 @@ export class GoogleVertexAIServiceSettings { * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face). */ streaming_url?: string /** @@ -1497,7 +1499,11 @@ export class GoogleVertexAIServiceSettings { export enum GoogleModelGardenProvider { google, - anthropic + anthropic, + meta, + hugging_face, + mistral, + ai21 } export class GoogleVertexAITaskSettings { diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml new file mode 100644 index 0000000000..ad2ed0183a --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Mistral endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml new file mode 100644 index 0000000000..00a076c727 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden AI21 endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_ai21_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml new file mode 100644 index 0000000000..ceadc43079 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden AI21 endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "ai21", + "service_account_json": "service-account-json", + "streaming_url": "https://url:streamRawPredict" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml index 5cb79753dc..224eec318a 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -1,5 +1,5 @@ -summary: A completion task for Google Model Garden Anthropic endpoint -description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +summary: A completion task for Google Model Garden Anthropic endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' # type: "request" value: |- diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml index 52b7ececd8..a8ae043de1 100644 --- a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -1,5 +1,5 @@ -summary: A chat_completion task for Google Model Garden Anthropic endpoint -description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +summary: A chat_completion task for Google Model Garden Anthropic endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden with single streaming URL provided. method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' # type: "request" value: |- @@ -8,7 +8,6 @@ value: |- "service_settings": { "provider": "anthropic", "service_account_json": "service-account-json", - "url": "https://url:rawPredict", "streaming_url": "https://streaming_url:streamRawPredict" }, "task_settings": { diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml new file mode 100644 index 0000000000..19f4ce4de5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml @@ -0,0 +1,14 @@ +summary: A completion task for Google Model Garden Meta endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_meta_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "url": "https://url/openapi/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml new file mode 100644 index 0000000000..8d38a23ca5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample6.yaml @@ -0,0 +1,14 @@ +summary: A chat_completion task for Google Model Garden Meta endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "meta", + "model_id": "meta/llama-3.3-70b-instruct-maas", + "service_account_json": "service-account-json", + "streaming_url": "https://url/openapi/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml new file mode 100644 index 0000000000..e503c99756 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample7.yaml @@ -0,0 +1,13 @@ +summary: A completion task for Google Model Garden Hugging Face endpoint with single URL provided for both streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "url": "https://url/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml new file mode 100644 index 0000000000..59c9c14010 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample8.yaml @@ -0,0 +1,13 @@ +summary: A chat_completion task for Google Model Garden Hugging Face endpoint with single streaming URL provided +description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden with single streaming URL provided. +method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "hugging_face", + "service_account_json": "service-account-json", + "streaming_url": "https://url/chat/completions" + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml new file mode 100644 index 0000000000..17c69126b5 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample9.yaml @@ -0,0 +1,15 @@ +summary: A completion task for Google Model Garden Mistral endpoint with separate URLs for streaming and non-streaming tasks +description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. +method_request: 'PUT _inference/completion/google_model_garden_mistral_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "mistral", + "model_id": "mistral-small-2503", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://url:streamRawPredict" + } + }