Improves descriptions in the Inference APIs

kosabogi · kosabogi · commit aafd99f75197 · 2025-10-29T10:34:51.000+01:00
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -391,7 +391,7 @@ export class AlibabaCloudTaskSettings {
 export enum AlibabaCloudTaskType {
   completion,
   rerank,
-  space_embedding,
+  sparse_embedding,
   text_embedding
 }
 
diff --git a/specification/inference/delete/DeleteRequest.ts b/specification/inference/delete/DeleteRequest.ts
@@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'
 
 /**
  * Delete an inference endpoint
+ * This API requires the manage_inference cluster privilege (the built-in inference_admin role grants this privilege).
  * @rest_spec_name inference.delete
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
diff --git a/specification/inference/get/GetRequest.ts b/specification/inference/get/GetRequest.ts
@@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'
 
 /**
  * Get an inference endpoint
+ * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).
  * @rest_spec_name inference.get
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -51,6 +51,13 @@ import { TaskType } from '@inference/_types/TaskType'
  * * OpenAI (`chat_completion`, `completion`, `text_embedding`)
  * * VoyageAI (`rerank`, `text_embedding`)
  * * Watsonx inference integration (`text_embedding`)
+ *
+ * NOTE: When creating an inference endpoint, the associated machine learning model is automatically deployed if it is not
+ * already running. After creating the endpoint, wait for the model deployment to complete before using it. You can verify
+ * the deployment status by using the Get trained model statistics API. In the response, look for "state": "fully_allocated"
+ * and ensure the "allocation_count" matches the "target_allocation_count". Avoid creating multiple endpoints for the same
+ * model unless required, as each endpoint consumes significant resources.
+ *
  * @rest_spec_name inference.put
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts
@@ -68,6 +68,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts b/specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts
@@ -66,6 +66,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
@@ -73,6 +73,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_cohere/PutCohereRequest.ts b/specification/inference/put_cohere/PutCohereRequest.ts
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_contextualai/PutContextualAiRequest.ts b/specification/inference/put_contextualai/PutContextualAiRequest.ts
@@ -67,6 +67,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_custom/PutCustomRequest.ts b/specification/inference/put_custom/PutCustomRequest.ts
@@ -96,6 +96,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_deepseek/PutDeepSeekRequest.ts b/specification/inference/put_deepseek/PutDeepSeekRequest.ts
@@ -64,6 +64,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_elser/PutElserRequest.ts b/specification/inference/put_elser/PutElserRequest.ts
@@ -79,6 +79,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * Note that for ELSER endpoints, the max_chunk_size may not exceed `300`.
      * @ext_doc_id inference-chunking
      */
diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioRequest.ts
@@ -64,6 +64,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiRequest.ts
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts
@@ -101,6 +101,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_jinaai/PutJinaAiRequest.ts b/specification/inference/put_jinaai/PutJinaAiRequest.ts
@@ -68,6 +68,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_llama/PutLlamaRequest.ts b/specification/inference/put_llama/PutLlamaRequest.ts
@@ -64,6 +64,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_mistral/PutMistralRequest.ts b/specification/inference/put_mistral/PutMistralRequest.ts
@@ -64,6 +64,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_openai/PutOpenAiRequest.ts b/specification/inference/put_openai/PutOpenAiRequest.ts
@@ -66,6 +66,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_voyageai/PutVoyageAIRequest.ts b/specification/inference/put_voyageai/PutVoyageAIRequest.ts
@@ -67,6 +67,8 @@ export interface Request extends RequestBase {
   body: {
     /**
      * The chunking configuration object.
+     * Applies only to the `sparse_embedding` and `text_embedding` task types.
+     * Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings

Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,7 @@ export class AlibabaCloudTaskSettings {`
`391`	`391`	`export enum AlibabaCloudTaskType {`
`392`	`392`	`completion,`
`393`	`393`	`rerank,`
`394`		`- space_embedding,`
	`394`	`+ sparse_embedding,`
`395`	`395`	`text_embedding`
`396`	`396`	`}`
`397`	`397`