diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 4710103659..b2a8841fc1 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -22729,6 +22729,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -102887,22 +102897,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -102926,12 +102952,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -102953,6 +102983,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -102996,6 +103033,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 7a28c89d20..cd91f00adb 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -13713,6 +13713,16 @@ "summary": "A rerank task", "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } } } @@ -66977,22 +66987,38 @@ "inference._types.GoogleVertexAIServiceSettings": { "type": "object", "properties": { + "provider": { + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.GoogleModelGardenProvider" + } + ] + }, + "url": { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, + "streaming_url": { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "type": "string" + }, "location": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" }, - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "type": "string" }, "model_id": { "externalDocs": { "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" }, - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "type": "string" }, "project_id": { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "type": "string" }, "rate_limit": { @@ -67016,12 +67042,16 @@ } }, "required": [ - "location", - "model_id", - "project_id", "service_account_json" ] }, + "inference._types.GoogleModelGardenProvider": { + "type": "string", + "enum": [ + "google", + "anthropic" + ] + }, "inference._types.GoogleVertexAITaskSettings": { "type": "object", "properties": { @@ -67043,6 +67073,13 @@ "$ref": "#/components/schemas/inference._types.ThinkingConfig" } ] + }, + "max_tokens": { + "externalDocs": { + "url": "https://docs.claude.com/en/api/messages#body-max-tokens" + }, + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "type": "number" } } }, @@ -67086,6 +67123,8 @@ "inference._types.TaskTypeGoogleVertexAI": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding", "rerank" ] diff --git a/output/schema/schema.json b/output/schema/schema.json index 7937dfaa3d..aed0da0ed0 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -172714,6 +172714,22 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L1383-L1386" }, + { + "kind": "enum", + "members": [ + { + "name": "google" + }, + { + "name": "anthropic" + } + ], + "name": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1457-L1460" + }, { "kind": "interface", "name": { @@ -172722,11 +172738,47 @@ }, "properties": [ { - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "description": "The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks.\nIn order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`.\nModes:\n- Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`.\n- Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters.", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleModelGardenProvider", + "namespace": "inference._types" + } + } + }, + { + "description": "The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint.\nIf both `url` and `streaming_url` are provided, each is used for its respective mode.\nIf `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).\nAt least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.", + "name": "url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint.\nIf both `streaming_url` and `url` are provided, each is used for its respective mode.\nIf `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.\nIf `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).\nAt least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.", + "name": "streaming_url", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the location to use for the inference task for the Google Vertex AI inference task.\nFor Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored.\nRefer to the Google documentation for the list of supported locations.", "extDocId": "googlevertexai-locations", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", "name": "location", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172736,11 +172788,11 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "description": "The name of the model to use for the inference task.\nFor Google Vertex AI `model_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored.\nRefer to the Google documentation for the list of supported models for Google Vertex AI.", "extDocId": "googlevertexai-models", "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", "name": "model_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172750,9 +172802,9 @@ } }, { - "description": "The name of the project to use for the inference task.", + "description": "The name of the project to use for the Google Vertex AI inference task.\nFor Google Vertex AI `project_id` is mandatory.\nFor Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored.", "name": "project_id", - "required": true, + "required": false, "type": { "kind": "instance_of", "type": { @@ -172800,7 +172852,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1392-L1425" + "specLocation": "inference/_types/CommonTypes.ts#L1392-L1455" }, { "kind": "enum", @@ -172813,7 +172865,7 @@ "name": "GoogleVertexAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1458-L1460" + "specLocation": "inference/_types/CommonTypes.ts#L1501-L1503" }, { "kind": "interface", @@ -172859,9 +172911,23 @@ "namespace": "inference._types" } } + }, + { + "description": "For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider.\nIf `provider` is not set to `anthropic`, this field is ignored.\nIf `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used.\nAnthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information.", + "extDocId": "anthropic-max-tokens", + "extDocUrl": "https://docs.claude.com/en/api/messages#body-max-tokens", + "name": "max_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1427-L1442" + "specLocation": "inference/_types/CommonTypes.ts#L1462-L1485" }, { "kind": "enum", @@ -172883,7 +172949,7 @@ "name": "GoogleVertexAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1451-L1456" + "specLocation": "inference/_types/CommonTypes.ts#L1494-L1499" }, { "kind": "interface", @@ -172945,7 +173011,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1462-L1494" + "specLocation": "inference/_types/CommonTypes.ts#L1505-L1537" }, { "kind": "enum", @@ -172958,7 +173024,7 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1515-L1517" + "specLocation": "inference/_types/CommonTypes.ts#L1558-L1560" }, { "kind": "interface", @@ -172992,7 +173058,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1496-L1506" + "specLocation": "inference/_types/CommonTypes.ts#L1539-L1549" }, { "kind": "enum", @@ -173014,7 +173080,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1508-L1513" + "specLocation": "inference/_types/CommonTypes.ts#L1551-L1556" }, { "kind": "interface", @@ -174246,7 +174312,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1519-L1548" + "specLocation": "inference/_types/CommonTypes.ts#L1562-L1591" }, { "kind": "enum", @@ -174259,7 +174325,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1578-L1580" + "specLocation": "inference/_types/CommonTypes.ts#L1621-L1623" }, { "kind": "enum", @@ -174278,7 +174344,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1582-L1586" + "specLocation": "inference/_types/CommonTypes.ts#L1625-L1629" }, { "kind": "interface", @@ -174324,7 +174390,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1550-L1571" + "specLocation": "inference/_types/CommonTypes.ts#L1593-L1614" }, { "kind": "enum", @@ -174340,7 +174406,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1573-L1576" + "specLocation": "inference/_types/CommonTypes.ts#L1616-L1619" }, { "kind": "enum", @@ -174362,7 +174428,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1588-L1593" + "specLocation": "inference/_types/CommonTypes.ts#L1631-L1636" }, { "kind": "interface", @@ -174434,7 +174500,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1595-L1625" + "specLocation": "inference/_types/CommonTypes.ts#L1638-L1668" }, { "kind": "enum", @@ -174447,7 +174513,7 @@ "name": "LlamaServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1633-L1635" + "specLocation": "inference/_types/CommonTypes.ts#L1676-L1678" }, { "kind": "enum", @@ -174466,7 +174532,7 @@ "name": "LlamaSimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1637-L1641" + "specLocation": "inference/_types/CommonTypes.ts#L1680-L1684" }, { "kind": "enum", @@ -174485,7 +174551,7 @@ "name": "LlamaTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1627-L1631" + "specLocation": "inference/_types/CommonTypes.ts#L1670-L1674" }, { "kind": "interface", @@ -174643,7 +174709,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1643-L1670" + "specLocation": "inference/_types/CommonTypes.ts#L1686-L1713" }, { "kind": "enum", @@ -174656,7 +174722,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1678-L1680" + "specLocation": "inference/_types/CommonTypes.ts#L1721-L1723" }, { "kind": "enum", @@ -174675,7 +174741,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676" + "specLocation": "inference/_types/CommonTypes.ts#L1715-L1719" }, { "kind": "interface", @@ -174762,7 +174828,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1682-L1724" + "specLocation": "inference/_types/CommonTypes.ts#L1725-L1767" }, { "kind": "enum", @@ -174775,7 +174841,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1740-L1742" + "specLocation": "inference/_types/CommonTypes.ts#L1783-L1785" }, { "kind": "interface", @@ -174797,7 +174863,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1726-L1732" + "specLocation": "inference/_types/CommonTypes.ts#L1769-L1775" }, { "kind": "enum", @@ -174816,7 +174882,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1734-L1738" + "specLocation": "inference/_types/CommonTypes.ts#L1777-L1781" }, { "kind": "interface", @@ -175393,6 +175459,12 @@ { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" }, @@ -175404,7 +175476,7 @@ "name": "TaskTypeGoogleVertexAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L113-L116" + "specLocation": "inference/_types/TaskType.ts#L113-L118" }, { "kind": "enum", @@ -175426,7 +175498,7 @@ "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L118-L123" + "specLocation": "inference/_types/TaskType.ts#L120-L125" }, { "kind": "enum", @@ -175461,7 +175533,7 @@ "name": "TaskTypeLlama", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L125-L129" + "specLocation": "inference/_types/TaskType.ts#L127-L131" }, { "kind": "enum", @@ -175480,7 +175552,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L131-L135" + "specLocation": "inference/_types/TaskType.ts#L133-L137" }, { "kind": "enum", @@ -175499,7 +175571,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L137-L141" + "specLocation": "inference/_types/TaskType.ts#L139-L143" }, { "kind": "enum", @@ -175515,7 +175587,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L143-L146" + "specLocation": "inference/_types/TaskType.ts#L145-L148" }, { "kind": "enum", @@ -175534,7 +175606,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L148-L152" + "specLocation": "inference/_types/TaskType.ts#L150-L154" }, { "kind": "interface", @@ -175656,7 +175728,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1444-L1449" + "specLocation": "inference/_types/CommonTypes.ts#L1487-L1492" }, { "kind": "interface", @@ -175802,7 +175874,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1744-L1775" + "specLocation": "inference/_types/CommonTypes.ts#L1787-L1818" }, { "kind": "enum", @@ -175815,7 +175887,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1808-L1810" + "specLocation": "inference/_types/CommonTypes.ts#L1851-L1853" }, { "kind": "interface", @@ -175875,7 +175947,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1777-L1801" + "specLocation": "inference/_types/CommonTypes.ts#L1820-L1844" }, { "kind": "enum", @@ -175891,7 +175963,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1803-L1806" + "specLocation": "inference/_types/CommonTypes.ts#L1846-L1849" }, { "kind": "interface", @@ -175979,7 +176051,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1812-L1850" + "specLocation": "inference/_types/CommonTypes.ts#L1855-L1893" }, { "kind": "enum", @@ -175992,7 +176064,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1858-L1860" + "specLocation": "inference/_types/CommonTypes.ts#L1901-L1903" }, { "kind": "enum", @@ -176011,7 +176083,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1852-L1856" + "specLocation": "inference/_types/CommonTypes.ts#L1895-L1899" }, { "kind": "request", @@ -179772,6 +179844,18 @@ "method_request": "PUT _inference/rerank/google_vertex_ai_rerank", "summary": "A rerank task", "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample3": { + "description": "Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/completion/google_model_garden_anthropic_completion", + "summary": "A completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" + }, + "PutGoogleVertexAiRequestExample4": { + "description": "Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.", + "method_request": "PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion", + "summary": "A chat_completion task for Google Model Garden Anthropic endpoint", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"provider\": \"anthropic\",\n \"service_account_json\": \"service-account-json\",\n \"url\": \"https://url:rawPredict\",\n \"streaming_url\": \"https://streaming_url:streamRawPredict\"\n },\n \"task_settings\": {\n \"max_tokens\": 128\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index e446eb2a0e..68a7cec0e2 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14209,10 +14209,15 @@ export interface InferenceGoogleAiStudioServiceSettings { export type InferenceGoogleAiStudioTaskType = 'completion' | 'text_embedding' +export type InferenceGoogleModelGardenProvider = 'google' | 'anthropic' + export interface InferenceGoogleVertexAIServiceSettings { - location: string - model_id: string - project_id: string + provider?: InferenceGoogleModelGardenProvider + url?: string + streaming_url?: string + location?: string + model_id?: string + project_id?: string rate_limit?: InferenceRateLimitSetting service_account_json: string dimensions?: integer @@ -14224,6 +14229,7 @@ export interface InferenceGoogleVertexAITaskSettings { auto_truncate?: boolean top_n?: integer thinking_config?: InferenceThinkingConfig + max_tokens?: integer } export type InferenceGoogleVertexAITaskType = 'rerank' | 'text_embedding' | 'completion' | 'chat_completion' @@ -14525,7 +14531,7 @@ export type InferenceTaskTypeElasticsearch = 'sparse_embedding' | 'text_embeddin export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' -export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' +export type InferenceTaskTypeGoogleVertexAI = 'chat_completion' | 'completion' | 'text_embedding' | 'rerank' export type InferenceTaskTypeHuggingFace = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding' diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 62ae137ab3..d565b82cf5 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -22,6 +22,7 @@ analysis,https://www.elastic.co/docs/manage-data/data-store/text-analysis,, analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/repo-analysis-api.html, analyzer-anatomy,https://www.elastic.co/docs/manage-data/data-store/text-analysis/anatomy-of-an-analyzer,, analyzer-update-existing,https://www.elastic.co/docs/manage-data/data-store/text-analysis/specify-an-analyzer#update-analyzers-on-existing-indices,, +anthropic-max-tokens,https://docs.claude.com/en/api/messages#body-max-tokens,, anthropic-messages,https://docs.anthropic.com/en/api/messages,, anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names,, api-date-math-index-names,https://www.elastic.co/docs/reference/elasticsearch/rest-apis/api-conventions#api-date-math-index-names,, diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 25d1be939a..f725891fab 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1391,21 +1391,51 @@ export enum GoogleAiServiceType { export class GoogleVertexAIServiceSettings { /** - * The name of the location to use for the inference task. + * The name of the Google Model Garden Provider for `completion` and `chat_completion` tasks. + * In order for a Google Model Garden endpoint to be used `provider` must be defined and be other than `google`. + * Modes: + * - Google Model Garden (third-party models): set `provider` to a supported non-`google` value and provide `url` and/or `streaming_url`. + * - Google Vertex AI: omit `provider` or set it to `google`. In this mode, do not set `url` or `streaming_url` and Elastic will construct the endpoint url from `location`, `model_id`, and `project_id` parameters. + */ + provider?: GoogleModelGardenProvider + /** + * The URL for non-streaming `completion` requests to a Google Model Garden provider endpoint. + * If both `url` and `streaming_url` are provided, each is used for its respective mode. + * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`). + * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage. + */ + url?: string + /** + * The URL for streaming `completion` and `chat_completion` requests to a Google Model Garden provider endpoint. + * If both `streaming_url` and `url` are provided, each is used for its respective mode. + * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests. + * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`). + * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage. + */ + streaming_url?: string + /** + * The name of the location to use for the inference task for the Google Vertex AI inference task. + * For Google Vertex AI, when `provider` is omitted or `google` `location` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `location` is ignored. * Refer to the Google documentation for the list of supported locations. * @ext_doc_id googlevertexai-locations */ - location: string + location?: string /** * The name of the model to use for the inference task. - * Refer to the Google documentation for the list of supported models. + * For Google Vertex AI `model_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `model_id` will be used for some providers that require it, otherwise - ignored. + * Refer to the Google documentation for the list of supported models for Google Vertex AI. * @ext_doc_id googlevertexai-models */ - model_id: string + model_id?: string /** - * The name of the project to use for the inference task. + * The name of the project to use for the Google Vertex AI inference task. + * For Google Vertex AI `project_id` is mandatory. + * For Google Model Garden's `completion` and `chat_completion` tasks, when `provider` is a supported non-`google` value - `project_id` is ignored. */ - project_id: string + project_id?: string /** * This setting helps to minimize the number of rate limit errors returned from Google Vertex AI. * By default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000. @@ -1424,6 +1454,11 @@ export class GoogleVertexAIServiceSettings { dimensions?: integer } +export enum GoogleModelGardenProvider { + google, + anthropic +} + export class GoogleVertexAITaskSettings { /** * For a `text_embedding` task, truncate inputs longer than the maximum token length automatically. @@ -1439,6 +1474,14 @@ export class GoogleVertexAITaskSettings { * @ext_doc_id googlevertexai-thinking */ thinking_config?: ThinkingConfig + /** + * For `completion` and `chat_completion` tasks, specifies the `max_tokens` value for requests sent to the Google Model Garden `anthropic` provider. + * If `provider` is not set to `anthropic`, this field is ignored. + * If `max_tokens` is specified - it must be a positive integer. If not specified, the default value of 1024 is used. + * Anthropic models require `max_tokens` to be set for each request. Please refer to the Anthropic documentation for more information. + * @ext_doc_id anthropic-max-tokens + */ + max_tokens?: integer } export class ThinkingConfig { diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index e0e5882eb3..c5d56e439f 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -111,6 +111,8 @@ export enum TaskTypeGoogleAIStudio { } export enum TaskTypeGoogleVertexAI { + chat_completion, + completion, text_embedding, rerank } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml new file mode 100644 index 0000000000..5cb79753dc --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml @@ -0,0 +1,17 @@ +summary: A completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + } diff --git a/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml new file mode 100644 index 0000000000..52b7ececd8 --- /dev/null +++ b/specification/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml @@ -0,0 +1,17 @@ +summary: A chat_completion task for Google Model Garden Anthropic endpoint +description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden. +method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion' +# type: "request" +value: |- + { + "service": "googlevertexai", + "service_settings": { + "provider": "anthropic", + "service_account_json": "service-account-json", + "url": "https://url:rawPredict", + "streaming_url": "https://streaming_url:streamRawPredict" + }, + "task_settings": { + "max_tokens": 128 + } + }