Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion specification/inference/_types/CommonTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ export class AlibabaCloudTaskSettings {
export enum AlibabaCloudTaskType {
completion,
rerank,
space_embedding,
sparse_embedding,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😁

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧑‍🚀

text_embedding
}

Expand Down
1 change: 1 addition & 0 deletions specification/inference/delete/DeleteRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'

/**
* Delete an inference endpoint
* This API requires the manage_inference cluster privilege (the built-in `inference_admin` role grants this privilege).
* @rest_spec_name inference.delete
* @availability stack since=8.11.0 stability=stable visibility=public
* @availability serverless stability=stable visibility=public
Expand Down
1 change: 1 addition & 0 deletions specification/inference/get/GetRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { TaskType } from '@inference/_types/TaskType'

/**
* Get an inference endpoint
* This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege).
* @rest_spec_name inference.get
* @availability stack since=8.11.0 stability=stable visibility=public
* @availability serverless stability=stable visibility=public
Expand Down
7 changes: 7 additions & 0 deletions specification/inference/put/PutRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ import { TaskType } from '@inference/_types/TaskType'
* * OpenAI (`chat_completion`, `completion`, `text_embedding`)
* * VoyageAI (`rerank`, `text_embedding`)
* * Watsonx inference integration (`text_embedding`)
*
* NOTE: When creating an inference endpoint, the associated machine learning model is automatically deployed if it is not
* already running. After creating the endpoint, wait for the model deployment to complete before using it. You can verify
* the deployment status by using the Get trained model statistics API. In the response, look for "state": "fully_allocated"
* and ensure the "allocation_count" matches the "target_allocation_count". Avoid creating multiple endpoints for the same
* model unless required, as each endpoint consumes significant resources.
*
* @rest_spec_name inference.put
* @availability stack since=8.11.0 stability=stable visibility=public
* @availability serverless stability=stable visibility=public
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_anthropic/PutAnthropicRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_cohere/PutCohereRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_custom/PutCustomRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_deepseek/PutDeepSeekRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_elser/PutElserRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* Note that for ELSER endpoints, the max_chunk_size may not exceed `300`.
* @ext_doc_id inference-chunking
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_jinaai/PutJinaAiRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_llama/PutLlamaRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_mistral/PutMistralRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_openai/PutOpenAiRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
2 changes: 2 additions & 0 deletions specification/inference/put_voyageai/PutVoyageAIRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ export interface Request extends RequestBase {
body: {
/**
* The chunking configuration object.
* Applies only to the `sparse_embedding` and `text_embedding` task types.
* Not applicable to the `rerank`, `completion`, or `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down