From 37289fc3638c347627bea9978005dcfc640ea712 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Tue, 4 Nov 2025 15:42:28 +0100 Subject: [PATCH] Fixes descriptions in the Inference APIs (#5566) * Improves descriptions in the Inference APIs * Update specification/inference/delete/DeleteRequest.ts Co-authored-by: Liam Thompson * Update specification/inference/put/PutRequest.ts Co-authored-by: David Kyle * Fixes chunking_settings object descriptions * Fixes oxford comma * Formatting fix --------- Co-authored-by: Liam Thompson Co-authored-by: David Kyle (cherry picked from commit 518ccd9a1ab34d9410c9e988fc23a3b921252f71) --- specification/inference/_types/CommonTypes.ts | 2 +- specification/inference/delete/DeleteRequest.ts | 1 + specification/inference/get/GetRequest.ts | 1 + .../inference/put_alibabacloud/PutAlibabaCloudRequest.ts | 2 ++ .../put_amazonbedrock/PutAmazonBedrockRequest.ts | 2 ++ .../put_amazonsagemaker/PutAmazonSageMakerRequest.ts | 2 ++ .../inference/put_anthropic/PutAnthropicRequest.ts | 6 ------ .../put_azureaistudio/PutAzureAiStudioRequest.ts | 2 ++ .../inference/put_azureopenai/PutAzureOpenAiRequest.ts | 2 ++ specification/inference/put_cohere/PutCohereRequest.ts | 2 ++ .../inference/put_contextualai/PutContextualAiRequest.ts | 6 ------ specification/inference/put_custom/PutCustomRequest.ts | 2 ++ .../inference/put_deepseek/PutDeepSeekRequest.ts | 6 ------ .../put_elasticsearch/PutElasticsearchRequest.ts | 2 +- .../put_googleaistudio/PutGoogleAiStudioRequest.ts | 2 ++ .../put_googlevertexai/PutGoogleVertexAiRequest.ts | 2 ++ .../inference/put_hugging_face/PutHuggingFaceRequest.ts | 2 ++ specification/inference/put_jinaai/PutJinaAiRequest.ts | 2 ++ specification/inference/put_llama/PutLlamaRequest.ts | 2 ++ specification/inference/put_mistral/PutMistralRequest.ts | 2 ++ specification/inference/put_openai/PutOpenAiRequest.ts | 2 ++ .../inference/put_voyageai/PutVoyageAIRequest.ts | 2 ++ specification/inference/put_watsonx/PutWatsonxRequest.ts | 8 ++++++++ 23 files changed, 42 insertions(+), 20 deletions(-) diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 10f45fd4d7..801e824595 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -391,7 +391,7 @@ export class AlibabaCloudTaskSettings { export enum AlibabaCloudTaskType { completion, rerank, - space_embedding, + sparse_embedding, text_embedding } diff --git a/specification/inference/delete/DeleteRequest.ts b/specification/inference/delete/DeleteRequest.ts index c12bcbd16c..2b4e3b0d25 100644 --- a/specification/inference/delete/DeleteRequest.ts +++ b/specification/inference/delete/DeleteRequest.ts @@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType' /** * Delete an inference endpoint + * This API requires the manage_inference cluster privilege (the built-in `inference_admin` role grants this privilege). * @rest_spec_name inference.delete * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/get/GetRequest.ts b/specification/inference/get/GetRequest.ts index a9523a8878..ac6356e9eb 100644 --- a/specification/inference/get/GetRequest.ts +++ b/specification/inference/get/GetRequest.ts @@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType' /** * Get an inference endpoint + * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). * @rest_spec_name inference.get * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts index c725397056..b2ce40073a 100644 --- a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts +++ b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts @@ -65,6 +65,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `sparse_embedding` or `text_embedding` task types. + * Not applicable to the `rerank` or `completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts index 61927ce3bf..e87d687d88 100644 --- a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts @@ -68,6 +68,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts index 1b020e5fc8..b91ebdcf5a 100644 --- a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts +++ b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts @@ -65,6 +65,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `sparse_embedding` or `text_embedding` task types. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts index d7942495c3..afbf09dec1 100644 --- a/specification/inference/put_anthropic/PutAnthropicRequest.ts +++ b/specification/inference/put_anthropic/PutAnthropicRequest.ts @@ -26,7 +26,6 @@ import { AnthropicTaskSettings, AnthropicTaskType } from '@inference/_types/CommonTypes' -import { InferenceChunkingSettings } from '@inference/_types/Services' /** * Create an Anthropic inference endpoint. @@ -64,11 +63,6 @@ export interface Request extends RequestBase { timeout?: Duration } body: { - /** - * The chunking configuration object. - * @ext_doc_id inference-chunking - */ - chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `anthropic`. */ diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts index 6ab0d8b029..2f38907138 100644 --- a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts @@ -65,6 +65,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank` or `completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts index af881b68ef..dd72ba63d8 100644 --- a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts +++ b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts @@ -73,6 +73,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_cohere/PutCohereRequest.ts b/specification/inference/put_cohere/PutCohereRequest.ts index 6ebfc47292..ef267defa5 100644 --- a/specification/inference/put_cohere/PutCohereRequest.ts +++ b/specification/inference/put_cohere/PutCohereRequest.ts @@ -65,6 +65,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank` or `completion` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_contextualai/PutContextualAiRequest.ts b/specification/inference/put_contextualai/PutContextualAiRequest.ts index fefd3fb051..d52ac070b8 100644 --- a/specification/inference/put_contextualai/PutContextualAiRequest.ts +++ b/specification/inference/put_contextualai/PutContextualAiRequest.ts @@ -25,7 +25,6 @@ import { ContextualAIServiceType, ContextualAITaskSettings } from '@inference/_types/CommonTypes' -import { InferenceChunkingSettings } from '@inference/_types/Services' import { TaskTypeContextualAI } from '@inference/_types/TaskType' /** @@ -65,11 +64,6 @@ export interface Request extends RequestBase { timeout?: Duration } body: { - /** - * The chunking configuration object. - * @ext_doc_id inference-chunking - */ - chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `contextualai`. */ diff --git a/specification/inference/put_custom/PutCustomRequest.ts b/specification/inference/put_custom/PutCustomRequest.ts index f7dcf96d14..9d7ef981c1 100644 --- a/specification/inference/put_custom/PutCustomRequest.ts +++ b/specification/inference/put_custom/PutCustomRequest.ts @@ -96,6 +96,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `sparse_embedding` or `text_embedding` task types. + * Not applicable to the `rerank` or `completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_deepseek/PutDeepSeekRequest.ts b/specification/inference/put_deepseek/PutDeepSeekRequest.ts index c188ffd857..7387aba59a 100644 --- a/specification/inference/put_deepseek/PutDeepSeekRequest.ts +++ b/specification/inference/put_deepseek/PutDeepSeekRequest.ts @@ -24,7 +24,6 @@ import { DeepSeekServiceSettings, DeepSeekServiceType } from '@inference/_types/CommonTypes' -import { InferenceChunkingSettings } from '@inference/_types/Services' import { TaskTypeDeepSeek } from '@inference/_types/TaskType' /** @@ -62,11 +61,6 @@ export interface Request extends RequestBase { timeout?: Duration } body: { - /** - * The chunking configuration object. - * @ext_doc_id inference-chunking - */ - chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `deepseek`. */ diff --git a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts index b35720d1c7..74d51feb19 100644 --- a/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts +++ b/specification/inference/put_elasticsearch/PutElasticsearchRequest.ts @@ -80,7 +80,7 @@ export interface Request extends RequestBase { /** * The chunking configuration object. * Applies only to the `sparse_embedding` and `text_embedding` task types. - * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. + * Not applicable to the `rerank` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts index 6871ceb750..c8c2f0f6a8 100644 --- a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts +++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts @@ -64,6 +64,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts index 80cf04059e..943faaf1dc 100644 --- a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts +++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts @@ -65,6 +65,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index d4c16ffb84..62f47a6846 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -101,6 +101,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank`, `completion`, or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_jinaai/PutJinaAiRequest.ts b/specification/inference/put_jinaai/PutJinaAiRequest.ts index 6685c2c874..a6f048ccfb 100644 --- a/specification/inference/put_jinaai/PutJinaAiRequest.ts +++ b/specification/inference/put_jinaai/PutJinaAiRequest.ts @@ -68,6 +68,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_llama/PutLlamaRequest.ts b/specification/inference/put_llama/PutLlamaRequest.ts index 966f83cc19..d1542382d6 100644 --- a/specification/inference/put_llama/PutLlamaRequest.ts +++ b/specification/inference/put_llama/PutLlamaRequest.ts @@ -64,6 +64,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_mistral/PutMistralRequest.ts b/specification/inference/put_mistral/PutMistralRequest.ts index 3a7b5eaace..304351e87a 100644 --- a/specification/inference/put_mistral/PutMistralRequest.ts +++ b/specification/inference/put_mistral/PutMistralRequest.ts @@ -64,6 +64,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts index b6bb675f46..48ec00c1ad 100644 --- a/specification/inference/put_openai/PutOpenAiRequest.ts +++ b/specification/inference/put_openai/PutOpenAiRequest.ts @@ -66,6 +66,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` or `chat_completion` task types. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts index 2bb4b7ef46..99ec97c36b 100644 --- a/specification/inference/put_voyageai/PutVoyageAIRequest.ts +++ b/specification/inference/put_voyageai/PutVoyageAIRequest.ts @@ -67,6 +67,8 @@ export interface Request extends RequestBase { body: { /** * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `rerank` task type. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings diff --git a/specification/inference/put_watsonx/PutWatsonxRequest.ts b/specification/inference/put_watsonx/PutWatsonxRequest.ts index b48fb3aa4c..86837a2254 100644 --- a/specification/inference/put_watsonx/PutWatsonxRequest.ts +++ b/specification/inference/put_watsonx/PutWatsonxRequest.ts @@ -25,6 +25,7 @@ import { WatsonxServiceType, WatsonxTaskType } from '@inference/_types/CommonTypes' +import { InferenceChunkingSettings } from '@inference/_types/Services' /** * Create a Watsonx inference endpoint. @@ -63,6 +64,13 @@ export interface Request extends RequestBase { timeout?: Duration } body: { + /** + * The chunking configuration object. + * Applies only to the `text_embedding` task type. + * Not applicable to the `completion` or `chat_completion` task types. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `watsonxai`. */