Skip to content

Commit aafd99f

Browse files
committed
Improves descriptions in the Inference APIs
1 parent c887822 commit aafd99f

File tree

23 files changed

+48
-1
lines changed

23 files changed

+48
-1
lines changed

specification/inference/_types/CommonTypes.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ export class AlibabaCloudTaskSettings {
391391
export enum AlibabaCloudTaskType {
392392
completion,
393393
rerank,
394-
space_embedding,
394+
sparse_embedding,
395395
text_embedding
396396
}
397397

specification/inference/delete/DeleteRequest.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'
2323

2424
/**
2525
* Delete an inference endpoint
26+
* This API requires the manage_inference cluster privilege (the built-in inference_admin role grants this privilege).
2627
* @rest_spec_name inference.delete
2728
* @availability stack since=8.11.0 stability=stable visibility=public
2829
* @availability serverless stability=stable visibility=public

specification/inference/get/GetRequest.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'
2323

2424
/**
2525
* Get an inference endpoint
26+
* This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).
2627
* @rest_spec_name inference.get
2728
* @availability stack since=8.11.0 stability=stable visibility=public
2829
* @availability serverless stability=stable visibility=public

specification/inference/put/PutRequest.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ import { TaskType } from '@inference/_types/TaskType'
5151
* * OpenAI (`chat_completion`, `completion`, `text_embedding`)
5252
* * VoyageAI (`rerank`, `text_embedding`)
5353
* * Watsonx inference integration (`text_embedding`)
54+
*
55+
* NOTE: When creating an inference endpoint, the associated machine learning model is automatically deployed if it is not
56+
* already running. After creating the endpoint, wait for the model deployment to complete before using it. You can verify
57+
* the deployment status by using the Get trained model statistics API. In the response, look for "state": "fully_allocated"
58+
* and ensure the "allocation_count" matches the "target_allocation_count". Avoid creating multiple endpoints for the same
59+
* model unless required, as each endpoint consumes significant resources.
60+
*
5461
* @rest_spec_name inference.put
5562
* @availability stack since=8.11.0 stability=stable visibility=public
5663
* @availability serverless stability=stable visibility=public

specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
6565
body: {
6666
/**
6767
* The chunking configuration object.
68+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
69+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
6870
* @ext_doc_id inference-chunking
6971
*/
7072
chunking_settings?: InferenceChunkingSettings

specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ export interface Request extends RequestBase {
6868
body: {
6969
/**
7070
* The chunking configuration object.
71+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
72+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
7173
* @ext_doc_id inference-chunking
7274
*/
7375
chunking_settings?: InferenceChunkingSettings

specification/inference/put_amazonsagemaker/PutAmazonSageMakerRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
6565
body: {
6666
/**
6767
* The chunking configuration object.
68+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
69+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
6870
* @ext_doc_id inference-chunking
6971
*/
7072
chunking_settings?: InferenceChunkingSettings

specification/inference/put_anthropic/PutAnthropicRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ export interface Request extends RequestBase {
6666
body: {
6767
/**
6868
* The chunking configuration object.
69+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
70+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
6971
* @ext_doc_id inference-chunking
7072
*/
7173
chunking_settings?: InferenceChunkingSettings

specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ export interface Request extends RequestBase {
6565
body: {
6666
/**
6767
* The chunking configuration object.
68+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
69+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
6870
* @ext_doc_id inference-chunking
6971
*/
7072
chunking_settings?: InferenceChunkingSettings

specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ export interface Request extends RequestBase {
7373
body: {
7474
/**
7575
* The chunking configuration object.
76+
* Applies only to the `sparse_embedding` and `text_embedding` task types.
77+
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
7678
* @ext_doc_id inference-chunking
7779
*/
7880
chunking_settings?: InferenceChunkingSettings

0 commit comments

Comments
 (0)