From 9c74af35a9a9afeddb86d8028c48d95ad03f9020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Mar 2025 15:41:00 +0100 Subject: [PATCH 1/2] Adds VoyageAI PUT inference details. --- output/openapi/elasticsearch-openapi.json | 152 ++++++++ .../elasticsearch-serverless-openapi.json | 152 ++++++++ output/schema/schema-serverless.json | 328 ++++++++++++++++++ output/schema/schema.json | 328 ++++++++++++++++++ output/typescript/types.ts | 31 ++ specification/_doc_ids/table.csv | 3 + .../_json_spec/inference.put.voyageai.json | 35 ++ .../put_voyageai/PutVoyageAIRequest.ts | 145 ++++++++ .../put_voyageai/PutVoyageAIResponse.ts | 24 ++ .../request/PutVoyageAIRequestExample1.yaml | 12 + .../request/PutVoyageAIRequestExample2.yaml | 11 + 11 files changed, 1221 insertions(+) create mode 100644 specification/_json_spec/inference.put.voyageai.json create mode 100644 specification/inference/put_voyageai/PutVoyageAIRequest.ts create mode 100644 specification/inference/put_voyageai/PutVoyageAIResponse.ts create mode 100644 specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample1.yaml create mode 100644 specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index a6324ca4e1..bcf0d8755c 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17755,6 +17755,92 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{voyageai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a VoyageAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-voyageai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "voyageai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_voyageai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutVoyageAIRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/voyageai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 512 dimensions.", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"voyage-3-large\",\n \"dimensions\": 512\n }\n}" + }, + "PutVoyageAIRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/voyageai-rerank` to create an inference endpoint that performs a `rerank` task.", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"rerank-2\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{watsonx_inference_id}": { "put": { "tags": [ @@ -76953,6 +77039,72 @@ } } }, + "inference.put_voyageai:VoyageAITaskType": { + "type": "string", + "enum": [ + "text_embedding", + "rerank" + ] + }, + "inference.put_voyageai:ServiceType": { + "type": "string", + "enum": [ + "voyageai" + ] + }, + "inference.put_voyageai:VoyageAIServiceSettings": { + "type": "object", + "properties": { + "dimensions": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/embeddings" + }, + "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "type": "number" + }, + "model_id": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/reranker" + }, + "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "embedding_type": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/embeddings" + }, + "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "type": "number" + } + }, + "required": [ + "model_id" + ] + }, + "inference.put_voyageai:VoyageAITaskSettings": { + "type": "object", + "properties": { + "input_type": { + "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.", + "type": "string" + }, + "return_documents": { + "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.", + "type": "boolean" + }, + "top_k": { + "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.", + "type": "number" + }, + "truncation": { + "description": "Whether to truncate the input texts to fit within the context length.", + "type": "boolean" + } + } + }, "inference.put_watsonx:WatsonxTaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 3cbc638020..468067807e 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9582,6 +9582,92 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{voyageai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a VoyageAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-voyageai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "voyageai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_voyageai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_voyageai:VoyageAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutVoyageAIRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/voyageai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 512 dimensions.", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"voyage-3-large\",\n \"dimensions\": 512\n }\n}" + }, + "PutVoyageAIRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/voyageai-rerank` to create an inference endpoint that performs a `rerank` task.", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"rerank-2\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{watsonx_inference_id}": { "put": { "tags": [ @@ -48150,6 +48236,72 @@ } } }, + "inference.put_voyageai:VoyageAITaskType": { + "type": "string", + "enum": [ + "text_embedding", + "rerank" + ] + }, + "inference.put_voyageai:ServiceType": { + "type": "string", + "enum": [ + "voyageai" + ] + }, + "inference.put_voyageai:VoyageAIServiceSettings": { + "type": "object", + "properties": { + "dimensions": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/embeddings" + }, + "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "type": "number" + }, + "model_id": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/reranker" + }, + "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "embedding_type": { + "externalDocs": { + "url": "https://docs.voyageai.com/docs/embeddings" + }, + "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "type": "number" + } + }, + "required": [ + "model_id" + ] + }, + "inference.put_voyageai:VoyageAITaskSettings": { + "type": "object", + "properties": { + "input_type": { + "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.", + "type": "string" + }, + "return_documents": { + "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.", + "type": "boolean" + }, + "top_k": { + "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.", + "type": "number" + }, + "truncation": { + "description": "Whether to truncate the input texts to fit within the context length.", + "type": "boolean" + } + } + }, "inference.put_watsonx:WatsonxTaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 885af064be..2c2ce7e447 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4591,6 +4591,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a VoyageAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-voyageai", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai", + "name": "inference.put_voyageai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_voyageai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_voyageai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{voyageai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -26945,6 +26990,136 @@ }, "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `voyageai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `voyageai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAIServiceSettings", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAITaskSettings", + "namespace": "inference.put_voyageai" + } + } + } + ] + }, + "description": "Create a VoyageAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutVoyageAIRequestExample1": { + "description": "Run `PUT _inference/text_embedding/voyageai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 512 dimensions.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"voyage-3-large\",\n \"dimensions\": 512\n }\n}" + }, + "PutVoyageAIRequestExample2": { + "description": "Run `PUT _inference/rerank/voyageai-rerank` to create an inference endpoint that performs a `rerank` task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"rerank-2\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_voyageai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAITaskType", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "voyageai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L29-L78" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -100049,6 +100224,35 @@ }, "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L90-L92" }, + { + "kind": "enum", + "members": [ + { + "name": "voyageai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L85-L87" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + }, + { + "name": "rerank" + } + ], + "name": { + "name": "VoyageAITaskType", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L80-L83" + }, { "kind": "enum", "members": [ @@ -120465,6 +120669,130 @@ ], "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L138-L144" }, + { + "kind": "interface", + "name": { + "name": "VoyageAIServiceSettings", + "namespace": "inference.put_voyageai" + }, + "properties": [ + { + "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "extDocId": "voyageai-embeddings", + "extDocUrl": "https://docs.voyageai.com/docs/embeddings", + "name": "dimensions", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.", + "extDocId": "voyageai-rerank", + "extDocUrl": "https://docs.voyageai.com/docs/reranker", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from VoyageAI.\nThe `voyageai` service sets a default number of requests allowed per minute depending on the task type.\nFor both `text_embedding` and `rerank`, it is set to `2000`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "extDocId": "voyageai-embeddings", + "extDocUrl": "https://docs.voyageai.com/docs/embeddings", + "name": "embedding_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L89-L120" + }, + { + "kind": "interface", + "name": { + "name": "VoyageAITaskSettings", + "namespace": "inference.put_voyageai" + }, + "properties": [ + { + "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.", + "name": "return_documents", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Whether to truncate the input texts to fit within the context length.", + "name": "truncation", + "required": false, + "serverDefault": true, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L122-L146" + }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 32cfd0cb11..9cb7c5050d 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9303,6 +9303,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a VoyageAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-voyageai", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai", + "name": "inference.put_voyageai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_voyageai" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_voyageai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{voyageai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150240,6 +150285,289 @@ }, "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L90-L92" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `voyageai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `voyageai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAIServiceSettings", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAITaskSettings", + "namespace": "inference.put_voyageai" + } + } + } + ] + }, + "description": "Create a VoyageAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `voyageai` service.\n\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutVoyageAIRequestExample1": { + "description": "Run `PUT _inference/text_embedding/voyageai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 512 dimensions.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"voyage-3-large\",\n \"dimensions\": 512\n }\n}" + }, + "PutVoyageAIRequestExample2": { + "description": "Run `PUT _inference/rerank/voyageai-rerank` to create an inference endpoint that performs a `rerank` task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"voyageai\",\n \"service_settings\": {\n \"model_id\": \"rerank-2\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_voyageai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "VoyageAITaskType", + "namespace": "inference.put_voyageai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "voyageai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L29-L78" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "voyageai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L85-L87" + }, + { + "kind": "interface", + "name": { + "name": "VoyageAIServiceSettings", + "namespace": "inference.put_voyageai" + }, + "properties": [ + { + "description": "The number of dimensions for resulting output embeddings.\nThis setting maps to `output_dimension` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "extDocId": "voyageai-embeddings", + "extDocUrl": "https://docs.voyageai.com/docs/embeddings", + "name": "dimensions", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the VoyageAI documentation for the list of available text embedding and rerank models.", + "extDocId": "voyageai-rerank", + "extDocUrl": "https://docs.voyageai.com/docs/reranker", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from VoyageAI.\nThe `voyageai` service sets a default number of requests allowed per minute depending on the task type.\nFor both `text_embedding` and `rerank`, it is set to `2000`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The data type for the embeddings to be returned.\nThis setting maps to `output_dtype` in the VoyageAI documentation.\nPermitted values: float, int8, bit.\n`int8` is a synonym of `byte` in the VoyageAI documentation.\n`bit` is a synonym of `binary` in the VoyageAI documentation.\nOnly for the `text_embedding` task type.", + "extDocId": "voyageai-embeddings", + "extDocUrl": "https://docs.voyageai.com/docs/embeddings", + "name": "embedding_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L89-L120" + }, + { + "kind": "interface", + "name": { + "name": "VoyageAITaskSettings", + "namespace": "inference.put_voyageai" + }, + "properties": [ + { + "description": "Type of the input text.\nPermitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation).\nOnly for the `text_embedding` task type.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.", + "name": "return_documents", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Whether to truncate the input texts to fit within the context length.", + "name": "truncation", + "required": false, + "serverDefault": true, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L122-L146" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + }, + { + "name": "rerank" + } + ], + "name": { + "name": "VoyageAITaskType", + "namespace": "inference.put_voyageai" + }, + "specLocation": "inference/put_voyageai/PutVoyageAIRequest.ts#L80-L83" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 7a173a0727..c3b79aaeda 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13258,6 +13258,37 @@ export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfo export type InferencePutOpenaiServiceType = 'openai' +export interface InferencePutVoyageaiRequest extends RequestBase { + task_type: InferencePutVoyageaiVoyageAITaskType + voyageai_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutVoyageaiServiceType + service_settings: InferencePutVoyageaiVoyageAIServiceSettings + task_settings?: InferencePutVoyageaiVoyageAITaskSettings + } +} + +export type InferencePutVoyageaiResponse = InferenceInferenceEndpointInfo + +export type InferencePutVoyageaiServiceType = 'voyageai' + +export interface InferencePutVoyageaiVoyageAIServiceSettings { + dimensions?: integer + model_id: string + rate_limit?: InferenceRateLimitSetting + embedding_type?: float +} + +export interface InferencePutVoyageaiVoyageAITaskSettings { + input_type?: string + return_documents?: boolean + top_k?: integer + truncation?: boolean +} + +export type InferencePutVoyageaiVoyageAITaskType = 'text_embedding' | 'rerank' + export interface InferencePutWatsonxRequest extends RequestBase { task_type: InferencePutWatsonxWatsonxTaskType watsonx_inference_id: Id diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 25d42971c6..69009d84b6 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -318,6 +318,7 @@ inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/op inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html +inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference inference-api-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference @@ -846,6 +847,8 @@ user-agent-processor,https://www.elastic.co/guide/en/elasticsearch/reference/cur user-profile,https://www.elastic.co/guide/en/elasticsearch/reference/current/user-profile.html verify-repository,https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshots-register-repository.html#snapshots-repository-verification voting-config-exclusions,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-post-voting-config-exclusions +voyageai-embeddings,https://docs.voyageai.com/docs/embeddings +voyageai-rerank,https://docs.voyageai.com/docs/reranker watcher-works,https://www.elastic.co/guide/en/elasticsearch/reference/current/how-watcher-works.html watcher-api-ack-watch,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-ack-watch watcher-api-activate-watch,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-watcher-activate-watch diff --git a/specification/_json_spec/inference.put.voyageai.json b/specification/_json_spec/inference.put.voyageai.json new file mode 100644 index 0000000000..8a587c75ab --- /dev/null +++ b/specification/_json_spec/inference.put.voyageai.json @@ -0,0 +1,35 @@ +{ + "inference.put_voyageai": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-apis.html", + "description": "Configure an VoyageAI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{voyageai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "voyageai_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts new file mode 100644 index 0000000000..8e4f0d4498 --- /dev/null +++ b/specification/inference/put_voyageai/PutVoyageAIRequest.ts @@ -0,0 +1,145 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create a VoyageAI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `voyageai` service. + + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_voyageai + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-voyageai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{voyageai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: VoyageAITaskType + /** + * The unique identifier of the inference endpoint. + */ + voyageai_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `voyageai`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `voyageai` service. + */ + service_settings: VoyageAIServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: VoyageAITaskSettings + } +} + +export enum VoyageAITaskType { + text_embedding, + rerank +} + +export enum ServiceType { + voyageai +} + +export class VoyageAIServiceSettings { + /** + * The number of dimensions for resulting output embeddings. + * This setting maps to `output_dimension` in the VoyageAI documentation. + * Only for the `text_embedding` task type. + * @ext_doc_id voyageai-embeddings + */ + dimensions?: integer + /** + * The name of the model to use for the inference task. + * Refer to the VoyageAI documentation for the list of available text embedding and rerank models. + * @ext_doc_id voyageai-embeddings + * @ext_doc_id voyageai-rerank + */ + model_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from VoyageAI. + * The `voyageai` service sets a default number of requests allowed per minute depending on the task type. + * For both `text_embedding` and `rerank`, it is set to `2000`. + */ + rate_limit?: RateLimitSetting + /** + * The data type for the embeddings to be returned. + * This setting maps to `output_dtype` in the VoyageAI documentation. + * Permitted values: float, int8, bit. + * `int8` is a synonym of `byte` in the VoyageAI documentation. + * `bit` is a synonym of `binary` in the VoyageAI documentation. + * Only for the `text_embedding` task type. + * @ext_doc_id voyageai-embeddings + */ + embedding_type?: float +} + +export class VoyageAITaskSettings { + /** + * Type of the input text. + * Permitted values: `ingest` (maps to `document` in the VoyageAI documentation), `search` (maps to `query` in the VoyageAI documentation). + * Only for the `text_embedding` task type. + */ + input_type?: string + /** + * Whether to return the source documents in the response. + * Only for the `rerank` task type. + * @server_default false + */ + return_documents?: boolean + /** + * The number of most relevant documents to return. + * If not specified, the reranking results of all documents will be returned. + * Only for the `rerank` task type. + */ + top_k?: integer + /** + * Whether to truncate the input texts to fit within the context length. + * @server_default true + */ + truncation?: boolean +} diff --git a/specification/inference/put_voyageai/PutVoyageAIResponse.ts b/specification/inference/put_voyageai/PutVoyageAIResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_voyageai/PutVoyageAIResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample1.yaml b/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample1.yaml new file mode 100644 index 0000000000..7a2b46fb3e --- /dev/null +++ b/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample1.yaml @@ -0,0 +1,12 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/voyageai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 512 dimensions. +# method_request: "PUT _inference/text_embedding/openai-embeddings" +# type: "request" +value: |- + { + "service": "voyageai", + "service_settings": { + "model_id": "voyage-3-large", + "dimensions": 512 + } + } diff --git a/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample2.yaml b/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample2.yaml new file mode 100644 index 0000000000..473dda3535 --- /dev/null +++ b/specification/inference/put_voyageai/examples/request/PutVoyageAIRequestExample2.yaml @@ -0,0 +1,11 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/voyageai-rerank` to create an inference endpoint that performs a `rerank` task. +# method_request: "PUT _inference/rerank/voyageai-rerank" +# type: "request" +value: |- + { + "service": "voyageai", + "service_settings": { + "model_id": "rerank-2" + } + } From aa3539bdb16673fcbd6627fab28b229011f48778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 18 Mar 2025 16:20:41 +0100 Subject: [PATCH 2/2] Update specification/inference/put_voyageai/PutVoyageAIRequest.ts --- specification/inference/put_voyageai/PutVoyageAIRequest.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts index 8e4f0d4498..ef016b1ff7 100644 --- a/specification/inference/put_voyageai/PutVoyageAIRequest.ts +++ b/specification/inference/put_voyageai/PutVoyageAIRequest.ts @@ -32,7 +32,7 @@ import { float, integer } from '@_types/Numeric' * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. * @rest_spec_name inference.put_voyageai - * @availability stack since=8.12.0 stability=stable visibility=public + * @availability stack since=8.19.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public * @cluster_privileges manage_inference * @doc_id inference-api-put-voyageai