diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 9b3accb5c6..1a480429be 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4598,15 +4598,6 @@ "visibility": "public" }, "stack": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= "since": "8.16.0", "stability": "stable", "visibility": "public" @@ -4652,7 +4643,6 @@ "visibility": "public" }, "stack": { ->>>>>>> b052219ca (Update doc_id URLs for inference APIs (#4127)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -4698,31 +4688,15 @@ "visibility": "public" }, "stack": { ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "since": "8.16.0", "stability": "stable", "visibility": "public" } }, -<<<<<<< HEAD -<<<<<<< HEAD - "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "docId": "inference-api-put-alibabacloud", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html", - "name": "inference.put_alibabacloud", -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-anthropic", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic", "name": "inference.put_anthropic", -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "privileges": { "cluster": [ "manage_inference" @@ -4730,15 +4704,7 @@ }, "request": { "name": "Request", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= - "namespace": "inference.put_anthropic" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "namespace": "inference.put_anthropic" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4746,15 +4712,7 @@ ], "response": { "name": "Response", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= - "namespace": "inference.put_anthropic" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "namespace": "inference.put_anthropic" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) }, "responseMediaType": [ "application/json" @@ -4764,15 +4722,7 @@ "methods": [ "PUT" ], -<<<<<<< HEAD -<<<<<<< HEAD - "path": "/_inference/{task_type}/{alibabacloud_inference_id}" -======= "path": "/_inference/{task_type}/{anthropic_inference_id}" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "path": "/_inference/{task_type}/{anthropic_inference_id}" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) } ] }, @@ -4783,45 +4733,15 @@ "visibility": "public" }, "stack": { -<<<<<<< HEAD -<<<<<<< HEAD "since": "8.14.0", -======= - "since": "8.13.0", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "since": "8.13.0", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "since": "8.13.0", ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "stability": "stable", "visibility": "public" } }, -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-azureaistudio", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio", "name": "inference.put_azureaistudio", -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) - "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "docId": "inference-api-put-cohere", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", - "name": "inference.put_cohere", -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "privileges": { "cluster": [ "manage_inference" @@ -4829,19 +4749,7 @@ }, "request": { "name": "Request", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "namespace": "inference.put_azureaistudio" -======= - "namespace": "inference.put_cohere" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_cohere" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "namespace": "inference.put_cohere" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4849,19 +4757,7 @@ ], "response": { "name": "Response", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "namespace": "inference.put_azureaistudio" -======= - "namespace": "inference.put_cohere" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_cohere" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "namespace": "inference.put_cohere" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, "responseMediaType": [ "application/json" @@ -4871,19 +4767,7 @@ "methods": [ "PUT" ], -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "path": "/_inference/{task_type}/{azureaistudio_inference_id}" -======= - "path": "/_inference/{task_type}/{cohere_inference_id}" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "path": "/_inference/{task_type}/{cohere_inference_id}" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "path": "/_inference/{task_type}/{cohere_inference_id}" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) } ] }, @@ -4894,31 +4778,15 @@ "visibility": "public" }, "stack": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "since": "8.14.0", -======= - "since": "8.12.0", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "since": "8.12.0", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "stability": "stable", "visibility": "public" } }, -<<<<<<< HEAD -<<<<<<< HEAD "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-azureopenai", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai", "name": "inference.put_azureopenai", -<<<<<<< HEAD -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= "privileges": { "cluster": [ "manage_inference" @@ -5000,85 +4868,6 @@ "visibility": "public" }, "stack": { - "since": "8.12.0", - "stability": "stable", - "visibility": "public" - } - }, ->>>>>>> b052219ca (Update doc_id URLs for inference APIs (#4127)) - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", - "docId": "inference-api-put-eis", - "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis", - "name": "inference.put_eis", -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "privileges": { - "cluster": [ - "manage_inference" - ] - }, - "request": { - "name": "Request", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureopenai" -======= - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureopenai" -======= - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "PUT" - ], -<<<<<<< HEAD -<<<<<<< HEAD - "path": "/_inference/{task_type}/{azureopenai_inference_id}" -======= - "path": "/_inference/{task_type}/{eis_inference_id}" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "path": "/_inference/{task_type}/{eis_inference_id}" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - } - ] - }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "since": "8.13.0", "stability": "stable", "visibility": "public" @@ -5211,203 +5000,6 @@ } ] }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) - "since": "8.15.0", - "stability": "stable", - "visibility": "public" - } - }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "docId": "inference-api-put-googlevertexai", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html", - "name": "inference.put_googlevertexai", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, - "request": { - "name": "Request", - "namespace": "inference.put_googlevertexai" - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", - "namespace": "inference.put_googlevertexai" - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "PUT" - ], - "path": "/_inference/{task_type}/{googlevertexai_inference_id}" - } - ] - }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) - "since": "8.12.0", - "stability": "stable", - "visibility": "public" - } - }, -<<<<<<< HEAD -======= - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", - "docId": "inference-api-put-eis", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", - "name": "inference.put_eis", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, - "request": { - "name": "Request", - "namespace": "inference.put_eis" - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", - "namespace": "inference.put_eis" - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "PUT" - ], - "path": "/_inference/{task_type}/{eis_inference_id}" - } - ] - }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { - "since": "8.11.0", - "stability": "stable", - "visibility": "public" - } - }, - "deprecation": { - "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", - "version": "8.16.0" - }, - "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "docId": "inference-api-put-elser", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-elser.html", - "name": "inference.put_elser", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, - "request": { - "name": "Request", - "namespace": "inference.put_elser" - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", - "namespace": "inference.put_elser" - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "PUT" - ], - "path": "/_inference/{task_type}/{elser_inference_id}" - } - ] - }, - { - "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, - "stack": { - "since": "8.15.0", - "stability": "stable", - "visibility": "public" - } - }, - "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "docId": "inference-api-put-googleaistudio", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-ai-studio.html", - "name": "inference.put_googleaistudio", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, - "request": { - "name": "Request", - "namespace": "inference.put_googleaistudio" - }, - "requestBodyRequired": false, - "requestMediaType": [ - "application/json" - ], - "response": { - "name": "Response", - "namespace": "inference.put_googleaistudio" - }, - "responseMediaType": [ - "application/json" - ], - "urls": [ - { - "methods": [ - "PUT" - ], - "path": "/_inference/{task_type}/{googleaistudio_inference_id}" - } - ] - }, { "availability": { "serverless": { @@ -5465,7 +5057,6 @@ "visibility": "public" } }, ->>>>>>> 33420c6e0 (Add ELSER inference API details (#4026)) "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", @@ -5596,8 +5187,6 @@ "visibility": "public" }, "stack": { -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27920,36 +27509,80 @@ "kind": "properties", "properties": [ { -<<<<<<< HEAD -======= -<<<<<<< HEAD -======= - "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_eis" + "namespace": "inference.put_alibabacloud" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "EisServiceSettings", - "namespace": "inference.put_eis" + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" } } } ] }, - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAlibabaCloudRequestExample1": { + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + } + }, "inherits": { "type": { "name": "RequestBase", @@ -27959,24 +27592,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_eis" + "namespace": "inference.put_alibabacloud" }, "path": [ { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "description": "The type of the inference task that the model will perform.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "EisTaskType", - "namespace": "inference.put_eis" + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "eis_inference_id", + "name": "alibabacloud_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27988,7 +27621,7 @@ } ], "query": [], - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" }, { "body": { @@ -28004,9 +27637,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_eis" + "namespace": "inference.put_alibabacloud" }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28030,46 +27663,54 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `elser`.", + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_elser" + "namespace": "inference.put_amazonbedrock" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ElserServiceSettings", - "namespace": "inference.put_elser" + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" } } } ] }, - "deprecation": { - "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", - "version": "8.16.0" - }, - "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutElserRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" }, - "PutElserRequestExample2": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", - "summary": "Adaptive allocations", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" } }, "inherits": { @@ -28081,7 +27722,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_elser" + "namespace": "inference.put_amazonbedrock" }, "path": [ { @@ -28091,14 +27732,14 @@ "type": { "kind": "instance_of", "type": { - "name": "ElserTaskType", - "namespace": "inference.put_elser" + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "elser_inference_id", + "name": "amazonbedrock_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28110,7 +27751,7 @@ } ], "query": [], - "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" }, { "body": { @@ -28123,18 +27764,12 @@ } } }, - "examples": { - "PutElserResponseExample1": { - "description": "A successful response when creating an ELSER inference endpoint.", - "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" - } - }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_elser" + "namespace": "inference.put_amazonbedrock" }, - "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28158,37 +27793,48 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_anthropic" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioServiceSettings", - "namespace": "inference.put_googleaistudio" + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" } } } ] }, - "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutGoogleAiStudioRequestExample1": { - "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" } }, "inherits": { @@ -28200,24 +27846,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_anthropic" }, "path": [ { - "description": "The type of the inference task that the model will perform.", + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioTaskType", - "namespace": "inference.put_googleaistudio" + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "googleaistudio_inference_id", + "name": "anthropic_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28229,7 +27875,7 @@ } ], "query": [], - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" }, { "body": { @@ -28245,9 +27891,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_anthropic" }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28271,26 +27917,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_azureaistudio" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" } } }, @@ -28301,24 +27947,24 @@ "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" } } } ] }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutGoogleVertexAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", "summary": "A text embedding task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" }, - "PutGoogleVertexAiRequestExample2": { - "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", - "summary": "A rerank task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" } }, "inherits": { @@ -28330,7 +27976,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_azureaistudio" }, "path": [ { @@ -28340,14 +27986,14 @@ "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "googlevertexai_inference_id", + "name": "azureaistudio_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28359,7 +28005,7 @@ } ], "query": [], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" }, { "body": { @@ -28375,9 +28021,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_azureaistudio" }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28387,8 +28033,6 @@ "kind": "properties", "properties": [ { ->>>>>>> 33420c6e0 (Add ELSER inference API details (#4026)) ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", @@ -28403,26 +28047,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_cohere" + "namespace": "inference.put_azureopenai" } } }, { - "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CohereServiceSettings", - "namespace": "inference.put_cohere" + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" } } }, @@ -28433,24 +28077,24 @@ "type": { "kind": "instance_of", "type": { - "name": "CohereTaskSettings", - "namespace": "inference.put_cohere" + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" } } } ] }, - "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutCohereRequestExample1": { - "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", "summary": "A text embedding task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" }, - "PutCohereRequestExample2": { - "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", - "summary": "A rerank task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" } }, "inherits": { @@ -28462,24 +28106,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_cohere" + "namespace": "inference.put_azureopenai" }, "path": [ { - "description": "The type of the inference task that the model will perform.", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CohereTaskType", - "namespace": "inference.put_cohere" + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "cohere_inference_id", + "name": "azureopenai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28491,7 +28135,7 @@ } ], "query": [], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" }, { "body": { @@ -28507,9 +28151,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_cohere" + "namespace": "inference.put_azureopenai" }, - "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28519,32 +28163,70 @@ "kind": "properties", "properties": [ { - "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `cohere`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_eis" + "namespace": "inference.put_cohere" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "EisServiceSettings", - "namespace": "inference.put_eis" + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" } } } ] }, - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutCohereRequestExample1": { + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, "inherits": { "type": { "name": "RequestBase", @@ -28554,24 +28236,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_eis" + "namespace": "inference.put_cohere" }, "path": [ { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "description": "The type of the inference task that the model will perform.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "EisTaskType", - "namespace": "inference.put_eis" + "name": "CohereTaskType", + "namespace": "inference.put_cohere" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "eis_inference_id", + "name": "cohere_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28583,7 +28265,7 @@ } ], "query": [], - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" }, { "body": { @@ -28599,9 +28281,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_eis" + "namespace": "inference.put_cohere" }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28625,46 +28307,74 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `elser`.", + "description": "The type of service supported for the specified task type. In this case, `elasticsearch`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_elser" + "namespace": "inference.put_elasticsearch" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", + "description": "Settings used to install the inference model. These settings are specific to the `elasticsearch` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ElserServiceSettings", - "namespace": "inference.put_elser" + "name": "ElasticsearchServiceSettings", + "namespace": "inference.put_elasticsearch" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ElasticsearchTaskSettings", + "namespace": "inference.put_elasticsearch" } } } ] }, - "deprecation": { - "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", - "version": "8.16.0" - }, - "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutElserRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" + "PutElasticsearchRequestExample1": { + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", + "summary": "ELSER sparse embedding task", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n },\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\" \n }\n}" }, - "PutElserRequestExample2": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", - "summary": "Adaptive allocations", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" + "PutElasticsearchRequestExample2": { + "description": "Run `PUT _inference/rerank/my-elastic-rerank` to create an inference endpoint that performs a rerank task using the built-in Elastic Rerank cross-encoder model. The `model_id` must be `.rerank-v1`, which is the ID of the built-in Elastic Rerank model. The API will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. Once deployed, the model can be used for semantic re-ranking with a `text_similarity_reranker` retriever.", + "summary": "Elastic rerank task", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"model_id\": \".rerank-v1\", \n \"num_threads\": 1,\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n }\n }\n}" + }, + "PutElasticsearchRequestExample3": { + "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task. The `model_id` must be the ID of one of the built-in E5 models. The API will automatically download the E5 model if it isn't already downloaded and then deploy the model.", + "summary": "E5 text embedding task", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\" \n }\n}" + }, + "PutElasticsearchRequestExample4": { + "description": "Run `PUT _inference/text_embedding/my-msmarco-minilm-model` to create an inference endpoint that performs a `text_embedding` task with a model that was uploaded by Eland.", + "summary": "Eland text embedding task", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \"msmarco-MiniLM-L12-cos-v5\" \n }\n}" + }, + "PutElasticsearchRequestExample5": { + "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task and to configure adaptive allocations. The API request will automatically download the E5 model if it isn't already downloaded and then deploy the model.", + "summary": "Adaptive allocation", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\"\n }\n}" + }, + "PutElasticsearchRequestExample6": { + "description": "Run `PUT _inference/sparse_embedding/use_existing_deployment` to use an already existing model deployment when creating an inference endpoint.", + "summary": "Existing model deployment", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"deployment_id\": \".elser_model_2\"\n }\n}" } }, "inherits": { @@ -28676,7 +28386,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_elser" + "namespace": "inference.put_elasticsearch" }, "path": [ { @@ -28686,14 +28396,14 @@ "type": { "kind": "instance_of", "type": { - "name": "ElserTaskType", - "namespace": "inference.put_elser" + "name": "ElasticsearchTaskType", + "namespace": "inference.put_elasticsearch" } } }, { - "description": "The unique identifier of the inference endpoint.", - "name": "elser_inference_id", + "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.", + "name": "elasticsearch_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28705,7 +28415,7 @@ } ], "query": [], - "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" + "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L25-L86" }, { "body": { @@ -28719,17 +28429,17 @@ } }, "examples": { - "PutElserResponseExample1": { - "description": "A successful response when creating an ELSER inference endpoint.", - "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" + "PutElasticsearchResponseExample1": { + "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n", + "value": "{\n \"inference_id\": \"use_existing_deployment\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 2,\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\",\n \"deployment_id\": \".elser_model_2\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" } }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_elser" + "namespace": "inference.put_elasticsearch" }, - "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" + "specLocation": "inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28753,37 +28463,46 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", + "description": "The type of service supported for the specified task type. In this case, `elser`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_elser" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", + "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioServiceSettings", - "namespace": "inference.put_googleaistudio" + "name": "ElserServiceSettings", + "namespace": "inference.put_elser" } } } ] }, - "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "deprecation": { + "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", + "version": "8.16.0" + }, + "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutGoogleAiStudioRequestExample1": { - "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + "PutElserRequestExample1": { + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" + }, + "PutElserRequestExample2": { + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", + "summary": "Adaptive allocations", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" } }, "inherits": { @@ -28795,7 +28514,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_elser" }, "path": [ { @@ -28805,14 +28524,14 @@ "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioTaskType", - "namespace": "inference.put_googleaistudio" + "name": "ElserTaskType", + "namespace": "inference.put_elser" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "googleaistudio_inference_id", + "name": "elser_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28824,7 +28543,7 @@ } ], "query": [], - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" }, { "body": { @@ -28837,12 +28556,18 @@ } } }, + "examples": { + "PutElserResponseExample1": { + "description": "A successful response when creating an ELSER inference endpoint.", + "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" + } + }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.put_elser" }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28866,54 +28591,37 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", + "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_googleaistudio" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" } } } ] }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutGoogleVertexAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample2": { - "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", - "summary": "A rerank task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + "PutGoogleAiStudioRequestExample1": { + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" } }, "inherits": { @@ -28925,7 +28633,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_googleaistudio" }, "path": [ { @@ -28935,14 +28643,14 @@ "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "googlevertexai_inference_id", + "name": "googleaistudio_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28954,7 +28662,7 @@ } ], "query": [], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" }, { "body": { @@ -28970,9 +28678,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.put_googleaistudio" }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28982,7 +28690,6 @@ "kind": "properties", "properties": [ { ->>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", @@ -28997,46 +28704,26 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD - "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", -======= -<<<<<<< HEAD -======= - "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "namespace": "inference.put_googlevertexai" } } }, { -<<<<<<< HEAD - "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", -======= - "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AlibabaCloudServiceSettings", - "namespace": "inference.put_alibabacloud" -======= - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "GoogleVertexAIServiceSettings", + "namespace": "inference.put_googlevertexai" } } }, @@ -29047,52 +28734,24 @@ "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AlibabaCloudTaskSettings", - "namespace": "inference.put_alibabacloud" -======= - "name": "AmazonBedrockTaskSettings", - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "GoogleVertexAITaskSettings", + "namespace": "inference.put_googlevertexai" } } } ] }, -<<<<<<< HEAD - "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutAlibabaCloudRequestExample1": { - "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", - "summary": "A completion task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample2": { - "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", - "summary": "A rerank task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample3": { - "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample4": { - "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" -======= - "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAmazonBedrockRequestExample1": { - "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "PutGoogleVertexAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", "summary": "A text embedding task", - "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" }, - "PutAmazonBedrockRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "PutGoogleVertexAiRequestExample2": { + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "summary": "A rerank task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" } }, "inherits": { @@ -29104,7 +28763,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_googlevertexai" }, "path": [ { @@ -29114,14 +28773,14 @@ "type": { "kind": "instance_of", "type": { - "name": "AmazonBedrockTaskType", - "namespace": "inference.put_amazonbedrock" + "name": "GoogleVertexAITaskType", + "namespace": "inference.put_googlevertexai" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "amazonbedrock_inference_id", + "name": "googlevertexai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -29133,7 +28792,7 @@ } ], "query": [], - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" }, { "body": { @@ -29149,9 +28808,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_googlevertexai" }, - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -29175,51 +28834,37 @@ } }, { -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_hugging_face" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AnthropicTaskSettings", - "namespace": "inference.put_anthropic" + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" } } } ] }, - "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAnthropicRequestExample1": { - "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", - "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" } }, "inherits": { @@ -29231,24 +28876,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_hugging_face" }, "path": [ { - "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "description": "The type of the inference task that the model will perform.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicTaskType", - "namespace": "inference.put_anthropic" + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "anthropic_inference_id", + "name": "huggingface_inference_id", "required": true, "type": { "kind": "instance_of", @@ -29260,7 +28905,7 @@ } ], "query": [], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" }, { "body": { @@ -29276,9 +28921,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_hugging_face" }, - "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -29302,26 +28947,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "description": "The type of service supported for the specified task type. In this case, `jinaai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_cohere" + "namespace": "inference.put_jinaai" } } }, { - "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CohereServiceSettings", - "namespace": "inference.put_cohere" + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" } } }, @@ -29332,28 +28977,24 @@ "type": { "kind": "instance_of", "type": { - "name": "CohereTaskSettings", - "namespace": "inference.put_cohere" + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" } } } ] }, - "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutCohereRequestExample1": { - "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "PutJinaAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", "summary": "A text embedding task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" }, - "PutCohereRequestExample2": { - "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "PutJinaAiRequestExample2": { + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", "summary": "A rerank task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" } }, "inherits": { @@ -29365,15 +29006,7 @@ "kind": "request", "name": { "name": "Request", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= - "namespace": "inference.put_cohere" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_cohere" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "namespace": "inference.put_jinaai" }, "path": [ { @@ -29383,32 +29016,14 @@ "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AlibabaCloudTaskType", - "namespace": "inference.put_alibabacloud" -======= - "name": "CohereTaskType", - "namespace": "inference.put_cohere" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "CohereTaskType", - "namespace": "inference.put_cohere" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" } } }, { "description": "The unique identifier of the inference endpoint.", -<<<<<<< HEAD -<<<<<<< HEAD - "name": "alibabacloud_inference_id", -======= - "name": "cohere_inference_id", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "cohere_inference_id", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "name": "jinaai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -29420,15 +29035,7 @@ } ], "query": [], -<<<<<<< HEAD -<<<<<<< HEAD - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" -======= - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" }, { "body": { @@ -29444,1150 +29051,9 @@ "kind": "response", "name": { "name": "Response", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" - }, - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" -======= - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_cohere" + "namespace": "inference.put_jinaai" }, - "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { -<<<<<<< HEAD -<<<<<<< HEAD - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", -======= - "description": "The type of service supported for the specified task type. In this case, `elastic`.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "The type of service supported for the specified task type. In this case, `elastic`.", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureaistudio" -======= - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - } - } - }, - { -<<<<<<< HEAD -<<<<<<< HEAD - "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", -======= - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AzureAiStudioServiceSettings", - "namespace": "inference.put_azureaistudio" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AzureAiStudioTaskSettings", - "namespace": "inference.put_azureaistudio" -======= - "name": "EisServiceSettings", - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "EisServiceSettings", - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - } - } - } - ] - }, -<<<<<<< HEAD -<<<<<<< HEAD - "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutAzureAiStudioRequestExample1": { - "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" - }, - "PutAzureAiStudioRequestExample2": { - "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", - "summary": "A completion task", - "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" - } - }, -======= - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureaistudio" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "AzureAiStudioTaskType", - "namespace": "inference.put_azureaistudio" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "azureaistudio_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_azureaistudio" - }, - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_azureopenai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "AzureOpenAIServiceSettings", - "namespace": "inference.put_azureopenai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AzureOpenAITaskSettings", - "namespace": "inference.put_azureopenai" - } - } - } - ] - }, - "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutAzureOpenAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" - }, - "PutAzureOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", - "summary": "A completion task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_azureopenai" -======= - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AzureOpenAITaskType", - "namespace": "inference.put_azureopenai" -======= - "name": "EisTaskType", - "namespace": "inference.put_eis" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "EisTaskType", - "namespace": "inference.put_eis" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", -<<<<<<< HEAD -<<<<<<< HEAD - "name": "azureopenai_inference_id", -======= - "name": "eis_inference_id", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "eis_inference_id", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], -<<<<<<< HEAD -<<<<<<< HEAD - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" -======= - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureopenai" - }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" -======= - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { -<<<<<<< HEAD -<<<<<<< HEAD -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "description": "The type of service supported for the specified task type. In this case, `elasticsearch`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_elasticsearch" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `elasticsearch` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ElasticsearchServiceSettings", - "namespace": "inference.put_elasticsearch" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "ElasticsearchTaskSettings", - "namespace": "inference.put_elasticsearch" - } - } - } - ] - }, - "description": "Create an Elasticsearch inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutElasticsearchRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "ELSER sparse embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n },\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\" \n }\n}" - }, - "PutElasticsearchRequestExample2": { - "description": "Run `PUT _inference/rerank/my-elastic-rerank` to create an inference endpoint that performs a rerank task using the built-in Elastic Rerank cross-encoder model. The `model_id` must be `.rerank-v1`, which is the ID of the built-in Elastic Rerank model. The API will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. Once deployed, the model can be used for semantic re-ranking with a `text_similarity_reranker` retriever.", - "summary": "Elastic rerank task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"model_id\": \".rerank-v1\", \n \"num_threads\": 1,\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n }\n }\n}" - }, - "PutElasticsearchRequestExample3": { - "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task. The `model_id` must be the ID of one of the built-in E5 models. The API will automatically download the E5 model if it isn't already downloaded and then deploy the model.", - "summary": "E5 text embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\" \n }\n}" - }, - "PutElasticsearchRequestExample4": { - "description": "Run `PUT _inference/text_embedding/my-msmarco-minilm-model` to create an inference endpoint that performs a `text_embedding` task with a model that was uploaded by Eland.", - "summary": "Eland text embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \"msmarco-MiniLM-L12-cos-v5\" \n }\n}" - }, - "PutElasticsearchRequestExample5": { - "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task and to configure adaptive allocations. The API request will automatically download the E5 model if it isn't already downloaded and then deploy the model.", - "summary": "Adaptive allocation", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\"\n }\n}" - }, - "PutElasticsearchRequestExample6": { - "description": "Run `PUT _inference/sparse_embedding/use_existing_deployment` to use an already existing model deployment when creating an inference endpoint.", - "summary": "Existing model deployment", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"deployment_id\": \".elser_model_2\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_elasticsearch" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ElasticsearchTaskType", - "namespace": "inference.put_elasticsearch" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.", - "name": "elasticsearch_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L25-L86" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "examples": { - "PutElasticsearchResponseExample1": { - "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n", - "value": "{\n \"inference_id\": \"use_existing_deployment\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 2,\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\",\n \"deployment_id\": \".elser_model_2\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_elasticsearch" - }, - "specLocation": "inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `elser`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_elser" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ElserServiceSettings", - "namespace": "inference.put_elser" - } - } - } - ] - }, - "deprecation": { - "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", - "version": "8.16.0" - }, - "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutElserRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" - }, - "PutElserRequestExample2": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", - "summary": "Adaptive allocations", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_elser" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ElserTaskType", - "namespace": "inference.put_elser" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "elser_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "examples": { - "PutElserResponseExample1": { - "description": "A successful response when creating an ELSER inference endpoint.", - "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_elser" - }, - "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_googleaistudio" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleAiStudioServiceSettings", - "namespace": "inference.put_googleaistudio" - } - } - } - ] - }, - "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutGoogleAiStudioRequestExample1": { - "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_googleaistudio" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleAiStudioTaskType", - "namespace": "inference.put_googleaistudio" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "googleaistudio_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" - } - } - } - ] - }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutGoogleVertexAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample2": { - "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", - "summary": "A rerank task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_googlevertexai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "googlevertexai_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { -<<<<<<< HEAD ->>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_hugging_face" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "HuggingFaceServiceSettings", - "namespace": "inference.put_hugging_face" - } - } - } - ] - }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutHuggingFaceRequestExample1": { - "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_hugging_face" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "HuggingFaceTaskType", - "namespace": "inference.put_hugging_face" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "huggingface_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_hugging_face" - }, - "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `jinaai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAIServiceSettings", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAITaskSettings", - "namespace": "inference.put_jinaai" - } - } - } - ] - }, - "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutJinaAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" - }, - "PutJinaAiRequestExample2": { - "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", - "summary": "A rerank task", - "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_jinaai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAITaskType", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "jinaai_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_jinaai" - }, - "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" + "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -30701,136 +29167,6 @@ }, "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" - } - } - } - ] - }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutGoogleVertexAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample2": { - "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", - "summary": "A rerank task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_googlevertexai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "googlevertexai_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" - }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -104167,11 +102503,6 @@ "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "name": "completion" }, { @@ -104179,15 +102510,37 @@ }, { "name": "space_embedding" -======= -<<<<<<< HEAD -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" + }, + { + "kind": "enum", + "members": [ + { + "name": "alibabacloud-ai-search" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" + }, + { + "kind": "enum", + "members": [ + { "name": "completion" }, { -<<<<<<< HEAD "name": "text_embedding" } ], @@ -104215,10 +102568,6 @@ "members": [ { "name": "completion" -======= -======= - "name": "completion" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) } ], "name": { @@ -104247,10 +102596,64 @@ "name": "completion" }, { -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureopenai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureopenai" + }, + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { "name": "rerank" }, { @@ -104258,7 +102661,6 @@ } ], "name": { -<<<<<<< HEAD "name": "CohereTaskType", "namespace": "inference.put_cohere" }, @@ -104360,94 +102762,30 @@ "kind": "enum", "members": [ { -======= -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) - "name": "chat_completion" - } - ], - "name": { - "name": "EisTaskType", - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" - }, - { - "kind": "enum", - "members": [ - { - "name": "elastic" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" - }, - { - "kind": "enum", - "members": [ - { -<<<<<<< HEAD -<<<<<<< HEAD "name": "rerank" }, { "name": "sparse_embedding" -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) }, { "name": "text_embedding" } ], "name": { -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AlibabaCloudTaskType", - "namespace": "inference.put_alibabacloud" - }, - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "name": "ElasticsearchTaskType", "namespace": "inference.put_elasticsearch" }, "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L88-L92" -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) }, { "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD - "name": "alibabacloud-ai-search" -======= - "name": "elasticsearch" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "name": "elasticsearch" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) } ], "name": { "name": "ServiceType", -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" - }, - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "namespace": "inference.put_elasticsearch" }, "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L94-L96" @@ -104456,10 +102794,6 @@ "kind": "enum", "members": [ { -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "name": "sparse_embedding" } ], @@ -104481,16 +102815,6 @@ "namespace": "inference.put_elser" }, "specLocation": "inference/put_elser/PutElserRequest.ts#L88-L90" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "kind": "enum", @@ -104503,229 +102827,57 @@ } ], "name": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AzureAiStudioTaskType", - "namespace": "inference.put_azureaistudio" - }, - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "name": "GoogleAiStudioTaskType", "namespace": "inference.put_googleaistudio" }, "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "azureaistudio" -======= - "name": "googleaistudio" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "name": "googleaistudio" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "name": "googleaistudio" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= - "name": "googleaistudio" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) } ], "name": { "name": "ServiceType", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureaistudio" - }, - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" -======= - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= "namespace": "inference.put_googleaistudio" }, "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "completion" -======= - "name": "rerank" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "rerank" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "name": "rerank" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= "name": "rerank" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "name": "text_embedding" } ], "name": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AzureOpenAITaskType", - "namespace": "inference.put_azureopenai" - }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAITaskType", "namespace": "inference.put_googlevertexai" }, "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "azureopenai" -======= "name": "googlevertexai" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "name": "googlevertexai" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "name": "googlevertexai" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= - "name": "googlevertexai" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= - "name": "googlevertexai" ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) } ], "name": { "name": "ServiceType", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "namespace": "inference.put_azureopenai" - }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" -======= - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "namespace": "inference.put_googlevertexai" }, "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "kind": "enum", "members": [ { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> 33420c6e0 (Add ELSER inference API details (#4026)) ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "name": "text_embedding" } ], @@ -104843,11 +102995,6 @@ "namespace": "inference.put_mistral" }, "specLocation": "inference/put_mistral/PutMistralRequest.ts#L83-L85" -======= - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "enum", @@ -125269,11 +123416,6 @@ { "kind": "interface", "name": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" }, @@ -125281,27 +123423,6 @@ { "description": "A valid API key for the AlibabaCloud AI Search API.", "name": "api_key", -======= -<<<<<<< HEAD -======= - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" - }, - "properties": [ - { - "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", - "name": "access_key", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= -======= - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" - }, - "properties": [ - { - "description": "A valid API key for the Anthropic API.", - "name": "api_key", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "required": true, "type": { "kind": "instance_of", @@ -125312,23 +123433,10 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", "extDocId": "alibabacloud-api-keys", "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", "name": "host", -======= - "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", - "extDocId": "amazonbedrock-models", - "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", - "name": "model", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", - "extDocId": "anothropic-models", - "name": "model_id", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "required": true, "type": { "kind": "instance_of", @@ -125339,41 +123447,7 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", -======= - "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", - "name": "provider", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", - "extDocId": "amazonbedrock-models", - "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", - "name": "region", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "name": "rate_limit", "required": false, "type": { @@ -125383,10 +123457,8 @@ "namespace": "inference._types" } } -<<<<<<< HEAD }, { -<<<<<<< HEAD "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", "name": "service_id", "required": true, @@ -125401,12 +123473,6 @@ { "description": "The name of the workspace used for the inference task.", "name": "workspace", -======= - "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", - "extDocId": "amazonbedrock-secret-keys", - "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", - "name": "secret_key", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -125417,16 +123483,7 @@ } } ], -<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" -======= - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - } - ], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) }, { "kind": "interface", @@ -125453,8 +123510,6 @@ { "kind": "interface", "name": { -<<<<<<< HEAD -<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" }, @@ -125489,15 +123544,13 @@ { "kind": "interface", "name": { - "name": "AzureAiStudioServiceSettings", - "namespace": "inference.put_azureaistudio" + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" }, "properties": [ { - "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "azureaistudio-api-keys", - "extDocUrl": "https://ai.azure.com/", - "name": "api_key", + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", "required": true, "type": { "kind": "instance_of", @@ -125508,10 +123561,10 @@ } }, { - "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", - "extDocId": "azureaistudio-endpoint-types", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", - "name": "endpoint_type", + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", "required": true, "type": { "kind": "instance_of", @@ -125522,9 +123575,9 @@ } }, { - "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", - "name": "target", - "required": true, + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, "type": { "kind": "instance_of", "type": { @@ -125534,8 +123587,10 @@ } }, { - "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", - "name": "provider", + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", "required": true, "type": { "kind": "instance_of", @@ -125546,7 +123601,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", "name": "rate_limit", "required": false, "type": { @@ -125556,39 +123611,33 @@ "namespace": "inference._types" } } - } - ], - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" - }, - { - "kind": "interface", - "name": { - "name": "AzureAiStudioTaskSettings", - "namespace": "inference.put_azureaistudio" - }, - "properties": [ + }, { - "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", - "name": "do_sample", - "required": false, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } - }, - { - "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", -======= + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" + }, + { + "kind": "interface", + "name": { "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" }, "properties": [ { "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "name": "max_new_tokens", "required": false, "serverDefault": 64, @@ -125601,11 +123650,7 @@ } }, { -<<<<<<< HEAD - "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", -======= "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "name": "temperature", "required": false, "type": { @@ -125617,13 +123662,8 @@ } }, { -<<<<<<< HEAD - "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", - "name": "top_p", -======= "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", "name": "top_k", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "required": false, "type": { "kind": "instance_of", @@ -125634,60 +123674,23 @@ } }, { -<<<<<<< HEAD - "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", - "name": "user", -======= "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", "name": "top_p", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "required": false, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "string", - "namespace": "_builtins" -======= "name": "float", "namespace": "_types" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) } } } ], -<<<<<<< HEAD - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" -======= "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) }, { "kind": "interface", "name": { -<<<<<<< HEAD - "name": "AzureOpenAIServiceSettings", - "namespace": "inference.put_azureopenai" - }, - "properties": [ - { - "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "azureopenai-auth", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", - "name": "api_key", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", - "name": "api_version", -======= "name": "AnthropicServiceSettings", "namespace": "inference.put_anthropic" }, @@ -125695,7 +123698,6 @@ { "description": "A valid API key for the Anthropic API.", "name": "api_key", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -125706,16 +123708,9 @@ } }, { -<<<<<<< HEAD - "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", - "extDocId": "azureopenai", - "extDocUrl": "https://oai.azure.com/", - "name": "deployment_id", -======= "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", "extDocId": "anothropic-models", "name": "model_id", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -125726,12 +123721,6 @@ } }, { -<<<<<<< HEAD - "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", - "extDocId": "azureopenai-auth", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", - "name": "entra_id", -======= "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", "name": "rate_limit", "required": false, @@ -125749,8 +123738,6 @@ { "kind": "interface", "name": { -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) "name": "AnthropicTaskSettings", "namespace": "inference.put_anthropic" }, @@ -125811,17 +123798,14 @@ { "kind": "interface", "name": { -======= -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) - "name": "CohereServiceSettings", - "namespace": "inference.put_cohere" + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" }, "properties": [ { - "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "cohere-api-keys", - "extDocUrl": "https://dashboard.cohere.com/api-keys", + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", "name": "api_key", "required": true, "type": { @@ -125833,28 +123817,181 @@ } }, { - "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", - "name": "embedding_type", + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", "required": false, - "serverDefault": "float", "type": { "kind": "instance_of", "type": { - "name": "EmbeddingType", - "namespace": "inference.put_cohere" + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" } } }, { - "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", - "name": "model_id", -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, + { + "kind": "interface", + "name": { + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" + }, + "properties": [ + { + "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "api_key", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", + "name": "api_version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", + "extDocId": "azureopenai", + "extDocUrl": "https://oai.azure.com/", + "name": "deployment_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", + "extDocId": "azureopenai-auth", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", + "name": "entra_id", "required": false, "type": { "kind": "instance_of", @@ -125865,21 +124002,9 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", "extDocId": "azureopenai-quota-limits", "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", -======= - "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= - "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "rate_limit", "required": false, "type": { @@ -125891,157 +124016,178 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", "extDocId": "azureopenai-portal", "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", "name": "resource_name", -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) - "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", - "name": "similarity", - "required": false, + "required": true, "type": { "kind": "instance_of", "type": { - "name": "SimilarityType", - "namespace": "inference.put_cohere" + "name": "string", + "namespace": "_builtins" } } } ], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" }, { "kind": "interface", "name": { -<<<<<<< HEAD -======= - "name": "RateLimitSetting", - "namespace": "inference._types" + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" }, "properties": [ { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", + "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", "required": false, "type": { "kind": "instance_of", "type": { - "name": "integer", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } } ], - "specLocation": "inference/_types/Services.ts#L95-L100" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" }, { "kind": "interface", "name": { ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) - "name": "CohereTaskSettings", + "name": "CohereServiceSettings", "namespace": "inference.put_cohere" }, "properties": [ { - "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", - "name": "input_type", + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "cohere-api-keys", + "extDocUrl": "https://dashboard.cohere.com/api-keys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", + "name": "embedding_type", "required": false, + "serverDefault": "float", "type": { "kind": "instance_of", "type": { - "name": "InputType", + "name": "EmbeddingType", "namespace": "inference.put_cohere" } } }, { - "description": "For a `rerank` task, return doc text within the results.", - "name": "return_documents", + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "name": "model_id", "required": false, "type": { "kind": "instance_of", "type": { - "name": "boolean", + "name": "string", "namespace": "_builtins" } } }, { - "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", - "name": "top_n", + "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", + "name": "rate_limit", "required": false, "type": { "kind": "instance_of", "type": { - "name": "integer", - "namespace": "_types" + "name": "RateLimitSetting", + "namespace": "inference._types" } } }, { - "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", - "name": "truncate", + "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", + "name": "similarity", "required": false, "type": { "kind": "instance_of", "type": { - "name": "TruncateType", + "name": "SimilarityType", "namespace": "inference.put_cohere" } } } ], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" }, { "kind": "interface", "name": { -======= -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) - "name": "EisServiceSettings", - "namespace": "inference.put_eis" + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" }, "properties": [ { - "description": "The name of the model to use for the inference task.", - "name": "model_id", - "required": true, + "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", + "name": "input_type", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "string", + "name": "InputType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `rerank` task, return doc text within the results.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", "namespace": "_builtins" } } }, { - "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", - "name": "rate_limit", + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", "required": false, "type": { "kind": "instance_of", "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", + "name": "truncate", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TruncateType", + "namespace": "inference.put_cohere" } } } ], - "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" }, { "kind": "interface", "name": { -<<<<<<< HEAD -<<<<<<< HEAD "name": "ElasticsearchServiceSettings", "namespace": "inference.put_elasticsearch" }, @@ -126146,15 +124292,6 @@ { "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", "name": "min_number_of_allocations", -======= - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "required": false, "type": { "kind": "instance_of", @@ -126165,16 +124302,11 @@ } } ], -<<<<<<< HEAD "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L98-L115" -======= - "specLocation": "inference/_types/Services.ts#L95-L100" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) }, { "kind": "interface", "name": { -<<<<<<< HEAD "name": "ElasticsearchTaskSettings", "namespace": "inference.put_elasticsearch" }, @@ -126198,10 +124330,6 @@ { "kind": "interface", "name": { -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) "name": "ElserServiceSettings", "namespace": "inference.put_elser" }, @@ -126343,8 +124471,6 @@ { "kind": "interface", "name": { -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAIServiceSettings", "namespace": "inference.put_googlevertexai" }, @@ -126404,19 +124530,6 @@ { "description": "A valid service account in JSON format for the Google Vertex AI API.", "name": "service_account_json", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "required": true, "type": { "kind": "instance_of", @@ -126427,74 +124540,11 @@ } } ], -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" -======= - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "interface", "name": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "AzureOpenAITaskSettings", - "namespace": "inference.put_azureopenai" - }, - "properties": [ - { - "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", - "name": "user", -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, - { - "kind": "interface", - "name": { ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "GoogleVertexAITaskSettings", "namespace": "inference.put_googlevertexai" }, @@ -126502,44 +124552,10 @@ { "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", "name": "auto_truncate", -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "required": false, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) "name": "boolean", "namespace": "_builtins" } @@ -126559,37 +124575,10 @@ } ], "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) -======= ->>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) -======= ->>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) ->>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) -======= ->>>>>>> 33420c6e0 (Add ELSER inference API details (#4026)) ->>>>>>> b82415b5e (Add ELSER inference API details (#4026)) + }, + { + "kind": "interface", + "name": { "name": "HuggingFaceServiceSettings", "namespace": "inference.put_hugging_face" }, @@ -126804,8 +124793,6 @@ } ], "specLocation": "inference/put_mistral/PutMistralRequest.ts#L87-L114" -======= ->>>>>>> fefad6ff9 (Add Google Vertex AI inference details (#4028)) }, { "kind": "interface",