diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index cab9da60c1..ef4f3cab76 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22749,6 +22749,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -102895,22 +102905,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -102934,12 +102960,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -102961,6 +102991,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -103004,6 +103041,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index eb435a3aec..a9b15afef9 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13731,6 +13731,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -66995,22 +67005,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -67034,12 +67060,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -67061,6 +67091,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -67104,6 +67141,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/schema/schema.json b/output/schema/schema.json index b468065687..bdaa2335ac 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -172569,6 +172569,22 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L1399-L1402" }, + { + "kind": "enum", + "members": [ + { + "name": "google" + }, + { + "name": "anthropic" + } + ], + "name": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1473-L1476" + }, { "kind": "interface", "name": { @@ -172577,11 +172593,47 @@ }, "properties": [ { - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "name": "url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "name": "streaming_url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "extDocId": "googlevertexai-locations", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", "name": "location", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172591,11 +172643,11 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "extDocId": "googlevertexai-models", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", "name": "model_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172605,9 +172657,9 @@ } }, { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "name": "project_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172655,7 +172707,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1408-L1441" + "specLocation": "inference/_types/CommonTypes.ts#L1408-L1471" }, { "kind": "enum", @@ -172668,7 +172720,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1474-L1476" + "specLocation": "inference/_types/CommonTypes.ts#L1517-L1519" }, { "kind": "interface", @@ -172714,9 +172766,23 @@ "namespace": "inference._types" } } + }, + { + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "extDocId": "anthropic-max-tokens", + "extDocUrl": "https://docs.claude.com/en/api/messages#body-max-tokens", + "name": "max_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1443-L1458" + "specLocation": "inference/_types/CommonTypes.ts#L1478-L1501" }, { "kind": "enum", @@ -172738,7 +172804,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1467-L1472" + "specLocation": "inference/_types/CommonTypes.ts#L1510-L1515" }, { "kind": "interface", @@ -172800,7 +172866,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1478-L1510" + "specLocation": "inference/_types/CommonTypes.ts#L1521-L1553" }, { "kind": "enum", @@ -172813,7 +172879,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1531-L1533" + "specLocation": "inference/_types/CommonTypes.ts#L1574-L1576" }, { "kind": "interface", @@ -172847,7 +172913,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1512-L1522" + "specLocation": "inference/_types/CommonTypes.ts#L1555-L1565" }, { "kind": "enum", @@ -172869,7 +172935,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1524-L1529" + "specLocation": "inference/_types/CommonTypes.ts#L1567-L1572" }, { "kind": "interface", @@ -174101,7 +174167,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1535-L1564" + "specLocation": "inference/_types/CommonTypes.ts#L1578-L1607" }, { "kind": "enum", @@ -174114,7 +174180,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1594-L1596" + "specLocation": "inference/_types/CommonTypes.ts#L1637-L1639" }, { "kind": "enum", @@ -174133,7 +174199,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1598-L1602" + "specLocation": "inference/_types/CommonTypes.ts#L1641-L1645" }, { "kind": "interface", @@ -174179,7 +174245,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1566-L1587" + "specLocation": "inference/_types/CommonTypes.ts#L1609-L1630" }, { "kind": "enum", @@ -174195,7 +174261,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1589-L1592" + "specLocation": "inference/_types/CommonTypes.ts#L1632-L1635" }, { "kind": "enum", @@ -174217,7 +174283,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1604-L1609" + "specLocation": "inference/_types/CommonTypes.ts#L1647-L1652" }, { "kind": "interface", @@ -174289,7 +174355,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1611-L1641" + "specLocation": "inference/_types/CommonTypes.ts#L1654-L1684" }, { "kind": "enum", @@ -174302,7 +174368,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1649-L1651" + "specLocation": "inference/_types/CommonTypes.ts#L1692-L1694" }, { "kind": "enum", @@ -174321,7 +174387,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1653-L1657" + "specLocation": "inference/_types/CommonTypes.ts#L1696-L1700" }, { "kind": "enum", @@ -174340,7 +174406,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1643-L1647" + "specLocation": "inference/_types/CommonTypes.ts#L1686-L1690" }, { "kind": "interface", @@ -174498,7 +174564,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1659-L1686" + "specLocation": "inference/_types/CommonTypes.ts#L1702-L1729" }, { "kind": "enum", @@ -174511,7 +174577,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1694-L1696" + "specLocation": "inference/_types/CommonTypes.ts#L1737-L1739" }, { "kind": "enum", @@ -174530,7 +174596,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1688-L1692" + "specLocation": "inference/_types/CommonTypes.ts#L1731-L1735" }, { "kind": "interface", @@ -174617,7 +174683,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1698-L1740" + "specLocation": "inference/_types/CommonTypes.ts#L1741-L1783" }, { "kind": "enum", @@ -174630,7 +174696,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1767-L1769" + "specLocation": "inference/_types/CommonTypes.ts#L1810-L1812" }, { "kind": "interface", @@ -174660,7 +174726,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1742-L1759" + "specLocation": "inference/_types/CommonTypes.ts#L1785-L1802" }, { "kind": "enum", @@ -174679,7 +174745,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1761-L1765" + "specLocation": "inference/_types/CommonTypes.ts#L1804-L1808" }, { "kind": "interface", @@ -175256,6 +175322,12 @@ { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" }, @@ -175267,7 +175339,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L113-L116" + "specLocation": "inference/_types/TaskType.ts#L113-L118" }, { "kind": "enum", @@ -175289,7 +175361,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L118-L123" + "specLocation": "inference/_types/TaskType.ts#L120-L125" }, { "kind": "enum", @@ -175324,7 +175396,7 @@ "name": "TaskTypeLlama", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L125-L129" + "specLocation": "inference/_types/TaskType.ts#L127-L131" }, { "kind": "enum", @@ -175343,7 +175415,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L131-L135" + "specLocation": "inference/_types/TaskType.ts#L133-L137" }, { "kind": "enum", @@ -175362,7 +175434,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L137-L141" + "specLocation": "inference/_types/TaskType.ts#L139-L143" }, { "kind": "enum", @@ -175378,7 +175450,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L143-L146" + "specLocation": "inference/_types/TaskType.ts#L145-L148" }, { "kind": "enum", @@ -175397,7 +175469,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L148-L152" + "specLocation": "inference/_types/TaskType.ts#L150-L154" }, { "kind": "interface", @@ -175519,7 +175591,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1460-L1465" + "specLocation": "inference/_types/CommonTypes.ts#L1503-L1508" }, { "kind": "interface", @@ -175665,7 +175737,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1771-L1802" + "specLocation": "inference/_types/CommonTypes.ts#L1814-L1845" }, { "kind": "enum", @@ -175678,7 +175750,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1835-L1837" + "specLocation": "inference/_types/CommonTypes.ts#L1878-L1880" }, { "kind": "interface", @@ -175738,7 +175810,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1804-L1828" + "specLocation": "inference/_types/CommonTypes.ts#L1847-L1871" }, { "kind": "enum", @@ -175754,7 +175826,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1830-L1833" + "specLocation": "inference/_types/CommonTypes.ts#L1873-L1876" }, { "kind": "interface", @@ -175842,7 +175914,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1839-L1877" + "specLocation": "inference/_types/CommonTypes.ts#L1882-L1920" }, { "kind": "enum", @@ -175855,7 +175927,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1885-L1887" + "specLocation": "inference/_types/CommonTypes.ts#L1928-L1930" }, { "kind": "enum", @@ -175874,7 +175946,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1879-L1883" + "specLocation": "inference/_types/CommonTypes.ts#L1922-L1926" }, { "kind": "request", @@ -179635,6 +179707,18 @@ "method_request": "PUT _inference/rerank/google_vertex_ai_rerank", "summary": "A rerank task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 4cffca0045..a071062c75 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14194,10 +14194,15 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' + export interface InferenceGoogleVertexAIServiceSettings { - location: string - model_id: string - project_id: string + provider?: InferenceGoogleModelGardenProvider + url?: string + streaming_url?: string + location?: string + model_id?: string + project_id?: string rate_limit?: InferenceRateLimitSetting service_account_json: string dimensions?: integer @@ -14209,6 +14214,7 @@ export interface InferenceGoogleVertexAITaskSettings { auto_truncate?: boolean top_n?: integer thinking_config?: InferenceThinkingConfig + max_tokens?: integer } export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion' @@ -14511,7 +14517,7 @@ export type InferenceTaskTypeElasticsearch = 'sparse_embedding' | 'text_embeddin export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' -export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' +export type InferenceTaskTypeGoogleVertexAI = 'chat_completion' | 'completion' | 'text_embedding' | 'rerank' export type InferenceTaskTypeHuggingFace = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding' diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 2cf039c60e..bca1d6e6ba 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -22,6 +22,7 @@ analysis,https://www.elastic.co/docs/manage-data/data-store/text-analysis,, analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/repo-analysis-api.html, analyzer-anatomy,https://www.elastic.co/docs/manage-data/data-store/text-analysis/anatomy-of-an-analyzer,, analyzer-update-existing,https://www.elastic.co/docs/manage-data/data-store/text-analysis/specify-an-analyzer#update-analyzers-on-existing-indices,, +anthropic-max-tokens,https://docs.claude.com/en/api/messages#body-max-tokens,, anthropic-messages,https://docs.anthropic.com/en/api/messages,, anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names,, api-date-math-index-names,https://www.elastic.co/docs/reference/elasticsearch/rest-apis/api-conventions#api-date-math-index-names,, diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 1bc2c3ae66..1898a342dc 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1407,21 +1407,51 @@ export enum GoogleAiServiceType { export class GoogleVertexAIServiceSettings { /** - * The name of the location to use for the inference task. + * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks. + * In order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. + * Modes: + * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`. + * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters. + */ + provider?: GoogleModelGardenProvider + /** + * The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint. + * If both `url` and `streaming_url` are provided, each is used for its respective mode. + * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). + * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + */ + url?: string + /** + * The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint. + * If both `streaming_url` and `url` are provided, each is used for its respective mode. + * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). + * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + */ + streaming_url?: string + /** + * The name of the location to use for the inference task for the Google Vertex AI inference task. + * For Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored. * Refer to the Google documentation for the list of supported locations. * @ext_doc_id googlevertexai-locations */ - location: string + location?: string /** * The name of the model to use for the inference task. - * Refer to the Google documentation for the list of supported models. + * For Google Vertex AI `model_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored. + * Refer to the Google documentation for the list of supported models for Google Vertex AI. * @ext_doc_id googlevertexai-models */ - model_id: string + model_id?: string /** - * The name of the project to use for the inference task. + * The name of the project to use for the Google Vertex AI inference task. + * For Google Vertex AI `project_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored. */ - project_id: string + project_id?: string /** * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI. * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000. @@ -1440,6 +1470,11 @@ export class GoogleVertexAIServiceSettings { dimensions?: integer } +export enum GoogleModelGardenProvider { + google, + anthropic +} + export class GoogleVertexAITaskSettings { /** * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically. @@ -1455,6 +1490,14 @@ export class GoogleVertexAITaskSettings { * @ext_doc_id googlevertexai-thinking */ thinking_config?: ThinkingConfig + /** + * For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider. + * If `provider` is not set to `anthropic`, this field is ignored. + * If `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used. + * Anthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information. + * @ext_doc_id anthropic-max-tokens + */ + max_tokens?: integer } export class ThinkingConfig { diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index e0e5882eb3..c5d56e439f 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -111,6 +111,8 @@ export enum TaskTypeGoogleAIStudio { } export enum TaskTypeGoogleVertexAI { + chat_completion, + completion, text_embedding, rerank } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml new file mode 100644 index 0000000000..5cb79753dc --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -0,0 +1,17 @@ +summary: A completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml new file mode 100644 index 0000000000..52b7ececd8 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -0,0 +1,17 @@ +summary: A chat_completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + }