diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 9181821efc..a2dfdf494a 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4323,6 +4323,86 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform chat completion inference", + "docId": "inference-api-chat-completion", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", + "name": "inference.chat_completion_unified", + "request": { + "name": "Request", + "namespace": "inference.chat_completion_unified" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.chat_completion_unified" + }, + "responseMediaType": [ + "text/event-stream" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/chat_completion/{inference_id}/_stream" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform completion inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference", + "name": "inference.completion", + "request": { + "name": "Request", + "namespace": "inference.completion" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.completion" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/completion/{inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -4427,26 +4507,26 @@ "visibility": "public" } }, - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-post", - "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference", - "name": "inference.inference", + "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "docId": "inference-api-put", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put", + "name": "inference.put", "privileges": { "cluster": [ - "monitor_inference" + "manage_inference" ] }, "request": { "name": "Request", - "namespace": "inference.inference" + "namespace": "inference.put" }, - "requestBodyRequired": false, + "requestBodyRequired": true, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.inference" + "namespace": "inference.put" }, "responseMediaType": [ "application/json" @@ -4454,13 +4534,13 @@ "urls": [ { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{inference_id}" }, { "methods": [ - "POST" + "PUT" ], "path": "/_inference/{task_type}/{inference_id}" } @@ -4473,15 +4553,15 @@ "visibility": "public" }, "stack": { - "since": "8.11.0", + "since": "8.16.0", "stability": "stable", "visibility": "public" } }, - "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-put", - "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put", - "name": "inference.put", + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-watsonx", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-watsonx-ai.html", + "name": "inference.put_watsonx", "privileges": { "cluster": [ "manage_inference" @@ -4489,15 +4569,15 @@ }, "request": { "name": "Request", - "namespace": "inference.put" + "namespace": "inference.put_watsonx" }, - "requestBodyRequired": true, + "requestBodyRequired": false, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.put" + "namespace": "inference.put_watsonx" }, "responseMediaType": [ "application/json" @@ -4507,13 +4587,52 @@ "methods": [ "PUT" ], - "path": "/_inference/{inference_id}" + "path": "/_inference/{task_type}/{watsonx_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform rereanking inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference", + "name": "inference.rerank", + "privileges": { + "cluster": [ + "monitor_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.rerank" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.rerank" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ { "methods": [ - "PUT" + "POST" ], - "path": "/_inference/{task_type}/{inference_id}" + "path": "/_inference/rerank/{inference_id}" } ] }, @@ -4524,17 +4643,18 @@ "visibility": "public" }, "stack": { - "since": "8.18.0", + "since": "8.11.0", "stability": "stable", "visibility": "public" } }, - "description": "Perform inference on the service using the Unified Schema", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "name": "inference.unified_inference", + "description": "Perform sparse embedding inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference", + "name": "inference.sparse_embedding", "request": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.sparse_embedding" }, "requestBodyRequired": false, "requestMediaType": [ @@ -4542,23 +4662,57 @@ ], "response": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.sparse_embedding" }, "responseMediaType": [ - "text/event-stream" + "application/json" ], "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_unified" + "path": "/_inference/sparse_embedding/{inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" }, + "stack": { + "since": "8.11.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform text embedding inference on the service", + "docId": "inference-api-post", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference", + "name": "inference.text_embedding", + "request": { + "name": "Request", + "namespace": "inference.text_embedding" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.text_embedding" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{task_type}/{inference_id}/_unified" + "path": "/_inference/text_embedding/{inference_id}" } ] }, @@ -23753,7 +23907,7 @@ } }, "kind": "dictionary_of", - "singleKey": false, + "singleKey": true, "value": { "kind": "instance_of", "type": { @@ -25356,9 +25510,116 @@ "CommonQueryParameters" ], "body": { - "kind": "no_body" + "kind": "properties", + "properties": [ + { + "description": "A list of objects representing the conversation.", + "name": "messages", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "Message", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "The ID of the model to use.", + "name": "model", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "name": "max_completion_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "name": "stop", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The sampling temperature to use.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "Controls which tool is called by the model.", + "name": "tool_choice", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolType", + "namespace": "inference.chat_completion_unified" + } + } + }, + { + "description": "A list of tools that the model can call.", + "name": "tools", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionTool", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ] }, - "description": "Delete an inference endpoint", + "description": "Perform chat completion inference", "inherits": { "type": { "name": "RequestBase", @@ -25368,23 +25629,11 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.delete" + "namespace": "inference.chat_completion_unified" }, "path": [ { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference identifier.", + "description": "The inference Id", "name": "inference_id", "required": true, "type": { @@ -25398,33 +25647,20 @@ ], "query": [ { - "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.", - "name": "dry_run", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.", - "name": "force", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", "required": false, - "serverDefault": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { - "name": "boolean", - "namespace": "_builtins" + "name": "Duration", + "namespace": "_types" } } } ], - "specLocation": "inference/delete/DeleteRequest.ts#L24-L66" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87" }, { "body": { @@ -25432,92 +25668,17 @@ "value": { "kind": "instance_of", "type": { - "name": "DeleteInferenceEndpointResult", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.delete" - }, - "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Get an inference endpoint", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.get" - }, - "path": [ - { - "description": "The task type", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - }, - { - "description": "The inference Id", - "name": "inference_id", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } + "name": "StreamResult", + "namespace": "_types" } } - ], - "query": [], - "specLocation": "inference/get/GetRequest.ts#L24-L56" - }, - { - "body": { - "kind": "properties", - "properties": [ - { - "name": "endpoints", - "required": true, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - } - } - ] }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.get" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/get/GetResponse.ts#L22-L26" + "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -25527,19 +25688,7 @@ "kind": "properties", "properties": [ { - "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.", - "name": "query", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "description": "Inference input.\nEither a string or an array of strings.", "name": "input", "required": true, "type": { @@ -25566,7 +25715,7 @@ } }, { - "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", + "description": "Optional task settings", "name": "task_settings", "required": false, "type": { @@ -25579,7 +25728,7 @@ } ] }, - "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "description": "Perform completion inference on the service", "inherits": { "type": { "name": "RequestBase", @@ -25589,11 +25738,79 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.inference" + "namespace": "inference.completion" }, "path": [ { - "description": "The type of inference task that the model performs.", + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/completion/CompletionRequest.ts#L25-L63" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.completion" + }, + "specLocation": "inference/completion/CompletionResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Delete an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.delete" + }, + "path": [ + { + "description": "The task type", "name": "task_type", "required": false, "type": { @@ -25605,7 +25822,7 @@ } }, { - "description": "The unique identifier for the inference endpoint.", + "description": "The inference identifier.", "name": "inference_id", "required": true, "type": { @@ -25619,20 +25836,33 @@ ], "query": [ { - "description": "The amount of time to wait for the inference request to complete.", - "name": "timeout", + "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.", + "name": "dry_run", "required": false, - "serverDefault": "30s", + "serverDefault": false, "type": { "kind": "instance_of", "type": { - "name": "Duration", - "namespace": "_types" + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.", + "name": "force", + "required": false, + "serverDefault": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" } } } ], - "specLocation": "inference/inference/InferenceRequest.ts#L26-L89" + "specLocation": "inference/delete/DeleteRequest.ts#L24-L66" }, { "body": { @@ -25640,7 +25870,7 @@ "value": { "kind": "instance_of", "type": { - "name": "InferenceResult", + "name": "DeleteInferenceEndpointResult", "namespace": "inference._types" } } @@ -25648,9 +25878,84 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.inference" + "namespace": "inference.delete" + }, + "specLocation": "inference/delete/DeleteResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Get an inference endpoint", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.get" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/get/GetRequest.ts#L24-L56" + }, + { + "body": { + "kind": "properties", + "properties": [ + { + "name": "endpoints", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + } + } + ] + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.get" }, - "specLocation": "inference/inference/InferenceResponse.ts#L22-L24" + "specLocation": "inference/get/GetResponse.ts#L22-L26" }, { "attachedBehaviors": [ @@ -25734,113 +26039,260 @@ "kind": "properties", "properties": [ { - "description": "A list of objects representing the conversation.", - "name": "messages", + "description": "The type of service supported for the specified task type. In this case, `watsonxai`.", + "name": "service", "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "Message", - "namespace": "inference.unified_inference" - } + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" } } }, { - "description": "The ID of the model to use.", - "name": "model", - "required": false, + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" } } - }, + } + ] + }, + "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_watsonx" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "watsonx_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "name": "max_completion_tokens", - "required": false, + "description": "Query input.", + "name": "query", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "long", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } }, { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "name": "stop", - "required": false, + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "name": "input", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } - } + ], + "kind": "union_of" } }, { - "description": "The sampling temperature to use.", - "name": "temperature", + "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", + "name": "task_settings", "required": false, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "TaskSettings", + "namespace": "inference._types" } } - }, - { - "description": "Controls which tool is called by the model.", - "name": "tool_choice", - "required": false, + } + ] + }, + "description": "Perform rereanking inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.rerank" + }, + "path": [ + { + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", "type": { - "kind": "instance_of", - "type": { - "name": "CompletionToolType", - "namespace": "inference.unified_inference" - } + "name": "Id", + "namespace": "_types" } - }, + } + } + ], + "query": [ + { + "description": "The amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/rerank/RerankRequest.ts#L25-L72" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "RerankedInferenceResult", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.rerank" + }, + "specLocation": "inference/rerank/RerankResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ { - "description": "A list of tools that the model can call.", - "name": "tools", - "required": false, + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionTool", - "namespace": "inference.unified_inference" + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } - } + ], + "kind": "union_of" } }, { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "name": "top_p", + "description": "Optional task settings", + "name": "task_settings", "required": false, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "TaskSettings", + "namespace": "inference._types" } } } ] }, - "description": "Perform inference on the service using the Unified Schema", + "description": "Perform sparse embedding inference on the service", "inherits": { "type": { "name": "RequestBase", @@ -25850,21 +26302,118 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.sparse_embedding" }, "path": [ { - "description": "The task type", - "name": "task_type", + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { - "name": "TaskType", - "namespace": "inference._types" + "name": "Duration", + "namespace": "_types" } } - }, + } + ], + "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.sparse_embedding" + }, + "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", + "required": true, + "type": { + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "kind": "union_of" + } + }, + { + "description": "Optional task settings", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Perform text embedding inference on the service", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.text_embedding" + }, + "path": [ { "description": "The inference Id", "name": "inference_id", @@ -25893,7 +26442,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L27-L95" + "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63" }, { "body": { @@ -25901,17 +26450,17 @@ "value": { "kind": "instance_of", "type": { - "name": "StreamResult", - "namespace": "_types" + "name": "TextEmbeddingInferenceResult", + "namespace": "inference._types" } } }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.text_embedding" }, - "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24" + "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -88083,7 +88632,7 @@ "name": "IndexOptions", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/core.ts#L258-L263" + "specLocation": "_types/mapping/core.ts#L262-L267" }, { "kind": "enum", @@ -88115,7 +88664,7 @@ "name": "OnScriptError", "namespace": "_types.mapping" }, - "specLocation": "_types/mapping/core.ts#L137-L140" + "specLocation": "_types/mapping/core.ts#L141-L144" }, { "kind": "type_alias", @@ -89097,7 +89646,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L302-L333" + "specLocation": "_types/mapping/core.ts#L306-L337" }, { "kind": "enum", @@ -89154,7 +89703,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L265-L268" + "specLocation": "_types/mapping/core.ts#L269-L272" }, { "kind": "enum", @@ -89257,7 +89806,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L88-L92" + "specLocation": "_types/mapping/core.ts#L92-L96" }, { "inherits": { @@ -89434,7 +89983,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L94-L113" + "specLocation": "_types/mapping/core.ts#L98-L117" }, { "description": "A variant of text that trades scoring and efficiency of positional queries for space efficiency. This field\neffectively stores data the same way as a text field that only indexes documents (index_options: docs) and\ndisables norms (norms: false). Term queries perform as fast if not faster as on text fields, however queries\nthat need positions such as the match_phrase query perform slower as they need to look at the _source document\nto verify whether a phrase matches. All queries return constant scores that are equal to 1.0.", @@ -89515,7 +90064,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L231-L256" + "specLocation": "_types/mapping/core.ts#L235-L260" }, { "inherits": { @@ -89539,7 +90088,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L188-L190" + "specLocation": "_types/mapping/core.ts#L192-L194" }, { "inherits": { @@ -89574,7 +90123,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L192-L195" + "specLocation": "_types/mapping/core.ts#L196-L199" }, { "inherits": { @@ -89609,7 +90158,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L197-L200" + "specLocation": "_types/mapping/core.ts#L201-L204" }, { "inherits": { @@ -89744,7 +90293,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L212-L223" + "specLocation": "_types/mapping/core.ts#L216-L227" }, { "inherits": { @@ -89957,7 +90506,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L270-L287" + "specLocation": "_types/mapping/core.ts#L274-L291" }, { "kind": "interface", @@ -90024,7 +90573,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L289-L291" + "specLocation": "_types/mapping/core.ts#L293-L295" }, { "inherits": { @@ -90065,7 +90614,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L293-L300" + "specLocation": "_types/mapping/core.ts#L297-L304" }, { "inherits": { @@ -90124,6 +90673,28 @@ } } }, + { + "name": "script", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Script", + "namespace": "_types" + } + } + }, + { + "name": "on_script_error", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OnScriptError", + "namespace": "_types.mapping" + } + } + }, { "name": "null_value", "required": false, @@ -90155,7 +90726,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L78-L86" + "specLocation": "_types/mapping/core.ts#L80-L90" }, { "inherits": { @@ -90225,6 +90796,28 @@ } } }, + { + "name": "script", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Script", + "namespace": "_types" + } + } + }, + { + "name": "on_script_error", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OnScriptError", + "namespace": "_types.mapping" + } + } + }, { "name": "null_value", "required": false, @@ -90267,7 +90860,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L66-L76" + "specLocation": "_types/mapping/core.ts#L66-L78" }, { "inherits": { @@ -90824,7 +91417,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L206-L210" + "specLocation": "_types/mapping/core.ts#L210-L214" }, { "inherits": { @@ -90848,7 +91441,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L202-L204" + "specLocation": "_types/mapping/core.ts#L206-L208" }, { "inherits": { @@ -91691,7 +92284,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L172-L175" + "specLocation": "_types/mapping/core.ts#L176-L179" }, { "inherits": { @@ -91814,7 +92407,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L115-L135" + "specLocation": "_types/mapping/core.ts#L119-L139" }, { "inherits": { @@ -91849,7 +92442,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L152-L155" + "specLocation": "_types/mapping/core.ts#L156-L159" }, { "inherits": { @@ -91884,7 +92477,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L142-L145" + "specLocation": "_types/mapping/core.ts#L146-L149" }, { "inherits": { @@ -91919,7 +92512,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L147-L150" + "specLocation": "_types/mapping/core.ts#L151-L154" }, { "inherits": { @@ -91954,7 +92547,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L157-L160" + "specLocation": "_types/mapping/core.ts#L161-L164" }, { "inherits": { @@ -91989,7 +92582,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L162-L165" + "specLocation": "_types/mapping/core.ts#L166-L169" }, { "inherits": { @@ -92035,7 +92628,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L182-L186" + "specLocation": "_types/mapping/core.ts#L186-L190" }, { "inherits": { @@ -92070,7 +92663,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L167-L170" + "specLocation": "_types/mapping/core.ts#L171-L174" }, { "inherits": { @@ -92105,7 +92698,7 @@ } } ], - "specLocation": "_types/mapping/core.ts#L177-L180" + "specLocation": "_types/mapping/core.ts#L181-L184" }, { "inherits": { @@ -97194,7 +97787,7 @@ "name": "DenseByteVector", "namespace": "inference._types" }, - "specLocation": "inference/_types/Results.ts#L40-L44", + "specLocation": "inference/_types/Results.ts#L47-L51", "type": { "kind": "array_of", "value": { @@ -97288,13 +97881,16 @@ }, { "name": "completion" + }, + { + "name": "chat_completion" } ], "name": { "name": "TaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L20-L28" + "specLocation": "inference/_types/TaskType.ts#L20-L29" }, { "codegenNames": [ @@ -97304,9 +97900,9 @@ "kind": "type_alias", "name": { "name": "CompletionToolType", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L97-L100", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92", "type": { "items": [ { @@ -97320,7 +97916,7 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } ], @@ -97332,7 +97928,7 @@ "kind": "interface", "name": { "name": "CompletionToolChoice", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -97355,19 +97951,19 @@ "kind": "instance_of", "type": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L186-L198" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190" }, { "description": "The tool choice function.", "kind": "interface", "name": { "name": "CompletionToolChoiceFunction", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -97383,7 +97979,7 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L175-L184" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176" }, { "codegenNames": [ @@ -97393,9 +97989,9 @@ "kind": "type_alias", "name": { "name": "MessageContent", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L148-L151", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143", "type": { "items": [ { @@ -97411,7 +98007,7 @@ "kind": "instance_of", "type": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" } } } @@ -97424,7 +98020,7 @@ "kind": "interface", "name": { "name": "ContentObject", - "namespace": "inference.unified_inference" + "namespace": "inference.chat_completion_unified" }, "properties": [ { @@ -97452,7 +98048,33 @@ } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L102-L114" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" + }, + { + "kind": "enum", + "members": [ + { + "name": "watsonxai" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L76-L78" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + } + ], + "name": { + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" + }, + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L72-L74" }, { "kind": "enum", @@ -117688,7 +118310,7 @@ } }, "kind": "dictionary_of", - "singleKey": false, + "singleKey": true, "value": { "kind": "instance_of", "type": { @@ -123848,6 +124470,287 @@ ], "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L32-L37" }, + { + "description": "An object representing part of the conversation.", + "kind": "interface", + "name": { + "name": "Message", + "namespace": "inference.chat_completion_unified" + }, + "properties": [ + { + "description": "The content of the message.", + "name": "content", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "MessageContent", + "namespace": "inference.chat_completion_unified" + } + } + }, + { + "description": "The role of the message author.", + "name": "role", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The tool call that this message is responding to.", + "name": "tool_call_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + }, + { + "description": "The tool calls generated by the model.", + "name": "tool_calls", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "ToolCall", + "namespace": "inference.chat_completion_unified" + } + } + } + } + ], + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165" + }, + { + "description": "A tool call generated by the model.", + "kind": "interface", + "name": { + "name": "ToolCall", + "namespace": "inference.chat_completion_unified" + }, + "properties": [ + { + "description": "The identifier of the tool call.", + "name": "id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + }, + { + "description": "The function that the model called.", + "name": "function", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ToolCallFunction", + "namespace": "inference.chat_completion_unified" + } + } + }, + { + "description": "The type of the tool call.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138" + }, + { + "description": "The function that the model called.", + "kind": "interface", + "name": { + "name": "ToolCallFunction", + "namespace": "inference.chat_completion_unified" + }, + "properties": [ + { + "description": "The arguments to call the function with in JSON format.", + "name": "arguments", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the function to call.", + "name": "name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120" + }, + { + "description": "A list of tools that the model can call.", + "kind": "interface", + "name": { + "name": "CompletionTool", + "namespace": "inference.chat_completion_unified" + }, + "properties": [ + { + "description": "The type of tool.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The function definition.", + "name": "function", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolFunction", + "namespace": "inference.chat_completion_unified" + } + } + } + ], + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227" + }, + { + "description": "The completion tool function definition.", + "kind": "interface", + "name": { + "name": "CompletionToolFunction", + "namespace": "inference.chat_completion_unified" + }, + "properties": [ + { + "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", + "name": "description", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the function.", + "name": "name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The parameters the functional accepts. This should be formatted as a JSON object.", + "name": "parameters", + "required": false, + "type": { + "kind": "user_defined_value" + } + }, + { + "description": "Whether to enable schema adherence when generating the function call.", + "name": "strict", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213" + }, + { + "description": "Defines the completion result.", + "kind": "interface", + "name": { + "name": "CompletionInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "completion", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionResult", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L84-L89" + }, + { + "description": "The completion result object", + "kind": "interface", + "name": { + "name": "CompletionResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "result", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L77-L82" + }, { "description": "Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint", "inherits": { @@ -123877,7 +124780,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L92-L97" + "specLocation": "inference/_types/Results.ts#L110-L115" }, { "description": "Represents an inference endpoint as returned by the GET API", @@ -124049,189 +124952,139 @@ "specLocation": "inference/_types/Services.ts#L60-L90" }, { - "description": "InferenceResult is an aggregation of mutually exclusive variants", "kind": "interface", "name": { - "name": "InferenceResult", - "namespace": "inference._types" + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" }, "properties": [ { - "name": "text_embedding_bytes", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingByteResult", - "namespace": "inference._types" - } - } - } - }, - { - "name": "text_embedding_bits", - "required": false, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "watsonx-api-keys", + "extDocUrl": "https://cloud.ibm.com/iam/apikeys", + "name": "api_key", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingByteResult", - "namespace": "inference._types" - } + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" } } }, { - "name": "text_embedding", - "required": false, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "extDocId": "watsonx-api-version", + "extDocUrl": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates", + "name": "api_version", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingResult", - "namespace": "inference._types" - } + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" } } }, { - "name": "sparse_embedding", - "required": false, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "extDocId": "watsonx-api-models", + "extDocUrl": "https://www.ibm.com/products/watsonx-ai/foundation-models", + "name": "model_id", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "SparseEmbeddingResult", - "namespace": "inference._types" - } + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" } } }, { - "name": "completion", - "required": false, + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "name": "project_id", + "required": true, "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionResult", - "namespace": "inference._types" - } + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" } } }, { - "name": "rerank", + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "RankedDocument", - "namespace": "inference._types" - } - } - } - } - ], - "specLocation": "inference/_types/Results.ts#L79-L90", - "variants": { - "kind": "container" - } - }, - { - "description": "The text embedding result object for byte representation", - "kind": "interface", - "name": { - "name": "TextEmbeddingByteResult", - "namespace": "inference._types" - }, - "properties": [ - { - "name": "embedding", - "required": true, "type": { "kind": "instance_of", "type": { - "name": "DenseByteVector", + "name": "RateLimitSetting", "namespace": "inference._types" } } - } - ], - "specLocation": "inference/_types/Results.ts#L46-L51" - }, - { - "description": "The text embedding result object", - "kind": "interface", - "name": { - "name": "TextEmbeddingResult", - "namespace": "inference._types" - }, - "properties": [ + }, { - "name": "embedding", + "description": "The URL of the inference endpoint that you created on Watsonx.", + "name": "url", "required": true, "type": { "kind": "instance_of", "type": { - "name": "DenseVector", - "namespace": "inference._types" + "name": "string", + "namespace": "_builtins" } } } ], - "specLocation": "inference/_types/Results.ts#L53-L58" + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L80-L117" }, { "kind": "interface", "name": { - "name": "SparseEmbeddingResult", + "name": "RateLimitSetting", "namespace": "inference._types" }, "properties": [ { - "name": "embedding", - "required": true, + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "SparseVector", - "namespace": "inference._types" + "name": "integer", + "namespace": "_types" } } } ], - "specLocation": "inference/_types/Results.ts#L36-L38" + "specLocation": "inference/_types/Services.ts#L96-L101" }, { - "description": "The completion result object", + "description": "Defines the response for a rerank request.", "kind": "interface", "name": { - "name": "CompletionResult", + "name": "RerankedInferenceResult", "namespace": "inference._types" }, "properties": [ { - "name": "result", + "name": "rerank", "required": true, "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "RankedDocument", + "namespace": "inference._types" + } } } } ], - "specLocation": "inference/_types/Results.ts#L60-L65" + "specLocation": "inference/_types/Results.ts#L103-L108" }, { "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested", @@ -124275,241 +125128,153 @@ } } ], - "specLocation": "inference/_types/Results.ts#L67-L77" + "specLocation": "inference/_types/Results.ts#L91-L101" }, { - "description": "An object representing part of the conversation.", + "description": "The response format for the sparse embedding request.", "kind": "interface", "name": { - "name": "Message", - "namespace": "inference.unified_inference" + "name": "SparseEmbeddingInferenceResult", + "namespace": "inference._types" }, "properties": [ { - "description": "The content of the message.", - "name": "content", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "MessageContent", - "namespace": "inference.unified_inference" - } - } - }, - { - "description": "The role of the message author.", - "name": "role", + "name": "sparse_embedding", "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The tool call that this message is responding to.", - "name": "tool_call_id", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - }, - { - "description": "The tool calls generated by the model.", - "name": "tool_calls", - "required": false, "type": { "kind": "array_of", "value": { "kind": "instance_of", "type": { - "name": "ToolCall", - "namespace": "inference.unified_inference" + "name": "SparseEmbeddingResult", + "namespace": "inference._types" } } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L153-L173" + "specLocation": "inference/_types/Results.ts#L40-L45" }, { - "description": "A tool call generated by the model.", "kind": "interface", "name": { - "name": "ToolCall", - "namespace": "inference.unified_inference" + "name": "SparseEmbeddingResult", + "namespace": "inference._types" }, "properties": [ { - "description": "The identifier of the tool call.", - "name": "id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - }, - { - "description": "The function that the model called.", - "name": "function", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ToolCallFunction", - "namespace": "inference.unified_inference" - } - } - }, - { - "description": "The type of the tool call.", - "name": "type", + "name": "embedding", "required": true, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" + "name": "SparseVector", + "namespace": "inference._types" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L130-L146" + "specLocation": "inference/_types/Results.ts#L36-L38" }, { - "description": "The function that the model called.", + "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants", "kind": "interface", "name": { - "name": "ToolCallFunction", - "namespace": "inference.unified_inference" + "name": "TextEmbeddingInferenceResult", + "namespace": "inference._types" }, "properties": [ { - "description": "The arguments to call the function with in JSON format.", - "name": "arguments", - "required": true, + "name": "text_embedding_bytes", + "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingByteResult", + "namespace": "inference._types" + } } } }, { - "description": "The name of the function to call.", - "name": "name", - "required": true, + "name": "text_embedding_bits", + "required": false, "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingByteResult", + "namespace": "inference._types" + } + } + } + }, + { + "name": "text_embedding", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingResult", + "namespace": "inference._types" + } } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L116-L128" + "specLocation": "inference/_types/Results.ts#L67-L75", + "variants": { + "kind": "container" + } }, { - "description": "A list of tools that the model can call.", + "description": "The text embedding result object for byte representation", "kind": "interface", "name": { - "name": "CompletionTool", - "namespace": "inference.unified_inference" + "name": "TextEmbeddingByteResult", + "namespace": "inference._types" }, "properties": [ { - "description": "The type of tool.", - "name": "type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The function definition.", - "name": "function", + "name": "embedding", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "name": "DenseByteVector", + "namespace": "inference._types" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L223-L235" + "specLocation": "inference/_types/Results.ts#L53-L58" }, { - "description": "The completion tool function definition.", + "description": "The text embedding result object", "kind": "interface", "name": { - "name": "CompletionToolFunction", - "namespace": "inference.unified_inference" + "name": "TextEmbeddingResult", + "namespace": "inference._types" }, "properties": [ { - "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", - "name": "description", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The name of the function.", - "name": "name", + "name": "embedding", "required": true, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The parameters the functional accepts. This should be formatted as a JSON object.", - "name": "parameters", - "required": false, - "type": { - "kind": "user_defined_value" - } - }, - { - "description": "Whether to enable schema adherence when generating the function call.", - "name": "strict", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" + "name": "DenseVector", + "namespace": "inference._types" } } } ], - "specLocation": "inference/unified_inference/UnifiedRequest.ts#L200-L221" + "specLocation": "inference/_types/Results.ts#L60-L65" }, { "kind": "interface",