From 8b02e55acf9255ba55d9b4dec3b33f4d638daf2c Mon Sep 17 00:00:00 2001 From: kosabogi Date: Tue, 3 Jun 2025 14:52:03 +0200 Subject: [PATCH 1/3] Adds custom InferenceEndpointInfo classes --- specification/inference/_types/Services.ts | 170 +++++++++++++++++- specification/inference/_types/TaskType.ts | 68 +++++++ .../chat_completion_unified/UnifiedRequest.ts | 3 - .../PutAmazonBedrockResponse.ts | 4 +- .../put_anthropic/PutAnthropicResponse.ts | 4 +- .../PutAzureAiStudioResponse.ts | 4 +- .../put_azureopenai/PutAzureOpenAiResponse.ts | 4 +- .../inference/put_cohere/PutCohereResponse.ts | 4 +- .../PutElasticsearchResponse.ts | 4 +- .../inference/put_elser/PutElserResponse.ts | 4 +- .../PutGoogleAiStudioResponse.ts | 4 +- .../PutGoogleVertexAiResponse.ts | 4 +- .../PutHuggingFaceResponse.ts | 4 +- .../put_mistral/PutMistralResponse.ts | 4 +- .../inference/put_openai/PutOpenAiResponse.ts | 4 +- .../put_voyageai/PutVoyageAIResponse.ts | 4 +- .../put_watsonx/PutWatsonxResponse.ts | 4 +- 17 files changed, 265 insertions(+), 32 deletions(-) diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index 63ef6bc1b8..56d51983f7 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -22,7 +22,21 @@ import { integer } from '@_types/Numeric' import { TaskType, TaskTypeAlibabaCloudAI, - TaskTypeJinaAi + TaskTypeAmazonBedrock, + TaskTypeAnthropic, + TaskTypeAzureAIStudio, + TaskTypeAzureOpenAI, + TaskTypeCohere, + TaskTypeElasticsearch, + TaskTypeELSER, + TaskTypeGoogleAIStudio, + TaskTypeGoogleVertexAI, + TaskTypeHuggingFace, + TaskTypeJinaAi, + TaskTypeMistral, + TaskTypeOpenAI, + TaskTypeVoyageAI, + TaskTypeWatsonx } from '../_types/TaskType' /** @@ -83,6 +97,160 @@ export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint { task_type: TaskTypeAlibabaCloudAI } +export class InferenceEndpointInfoAmazonBedrock extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeAmazonBedrock +} + +export class InferenceEndpointInfoAnthropic extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeAnthropic +} + +export class InferenceEndpointInfoAzureAIStudio extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeAzureAIStudio +} + +export class InferenceEndpointInfoAzureOpenAI extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeAzureOpenAI +} + +export class InferenceEndpointInfoCohere extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeCohere +} + +export class InferenceEndpointInfoElasticsearch extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeElasticsearch +} + +export class InferenceEndpointInfoELSER extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeELSER +} + +export class InferenceEndpointInfoGoogleAIStudio extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeGoogleAIStudio +} + +export class InferenceEndpointInfoGoogleVertexAI extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeGoogleVertexAI +} + +export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeHuggingFace +} + +export class InferenceEndpointInfoMistral extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeMistral +} + +export class InferenceEndpointInfoOpenAI extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeOpenAI +} + +export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeVoyageAI +} + +export class InferenceEndpointInfoWatsonx extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeWatsonx +} + /** * Chunking configuration object */ diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 5025ef40ba..7c36323488 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -39,3 +39,71 @@ export enum TaskTypeAlibabaCloudAI { completion, sparse_embedding } + +export enum TaskTypeAmazonBedrock { + text_embedding, + completion +} + +export enum TaskTypeAnthropic { + completion +} + +export enum TaskTypeAzureAIStudio { + text_embedding, + completion +} + +export enum TaskTypeAzureOpenAI { + text_embedding, + completion +} + +export enum TaskTypeCohere { + text_embedding, + rerank, + completion +} + +export enum TaskTypeElasticsearch { + sparse_embedding, + text_embedding, + rerank +} + +export enum TaskTypeELSER { + sparse_embedding +} + +export enum TaskTypeGoogleAIStudio { + text_embedding, + completion +} + +export enum TaskTypeGoogleVertexAI { + text_embedding, + rerank +} + +export enum TaskTypeHuggingFace { + text_embedding +} + +export enum TaskTypeMistral { + text_embedding +} + +export enum TaskTypeOpenAI { + text_embedding, + chat_completion, + completion +} + +export enum TaskTypeVoyageAI { + text_embedding, + rerank +} + +export enum TaskTypeWatsonx { + text_embedding +} diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index 3a5498defd..78e0b7912f 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -26,9 +26,6 @@ import { Duration } from '@_types/Time' * * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. * It only works with the `chat_completion` task type for `openai` and `elastic` inference services. - - * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. - * For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. * * NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion inference API and the Stream inference API differ in their response structure and capabilities. diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts index 581909e650..128ff4dc55 100644 --- a/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoAmazonBedrock } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoAmazonBedrock } diff --git a/specification/inference/put_anthropic/PutAnthropicResponse.ts b/specification/inference/put_anthropic/PutAnthropicResponse.ts index 581909e650..5a4803bc4e 100644 --- a/specification/inference/put_anthropic/PutAnthropicResponse.ts +++ b/specification/inference/put_anthropic/PutAnthropicResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoAnthropic } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoAnthropic } diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts index 581909e650..95201ae136 100644 --- a/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoAzureAIStudio } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoAzureAIStudio } diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts index 581909e650..7632f9367e 100644 --- a/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts +++ b/specification/inference/put_azureopenai/PutAzureOpenAiResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoAzureOpenAI } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoAzureOpenAI } diff --git a/specification/inference/put_cohere/PutCohereResponse.ts b/specification/inference/put_cohere/PutCohereResponse.ts index 581909e650..23267459a4 100644 --- a/specification/inference/put_cohere/PutCohereResponse.ts +++ b/specification/inference/put_cohere/PutCohereResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoCohere } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoCohere } diff --git a/specification/inference/put_elasticsearch/PutElasticsearchResponse.ts b/specification/inference/put_elasticsearch/PutElasticsearchResponse.ts index 581909e650..4da94ebfd1 100644 --- a/specification/inference/put_elasticsearch/PutElasticsearchResponse.ts +++ b/specification/inference/put_elasticsearch/PutElasticsearchResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoElasticsearch } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoElasticsearch } diff --git a/specification/inference/put_elser/PutElserResponse.ts b/specification/inference/put_elser/PutElserResponse.ts index 581909e650..abc9b6b87e 100644 --- a/specification/inference/put_elser/PutElserResponse.ts +++ b/specification/inference/put_elser/PutElserResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoELSER } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoELSER } diff --git a/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts b/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts index 581909e650..da6f354183 100644 --- a/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts +++ b/specification/inference/put_googleaistudio/PutGoogleAiStudioResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoGoogleAIStudio } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoGoogleAIStudio } diff --git a/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts b/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts index 581909e650..b37783d810 100644 --- a/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts +++ b/specification/inference/put_googlevertexai/PutGoogleVertexAiResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoGoogleVertexAI } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoGoogleVertexAI } diff --git a/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts index 581909e650..0842c67ebd 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoHuggingFace } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoHuggingFace } diff --git a/specification/inference/put_mistral/PutMistralResponse.ts b/specification/inference/put_mistral/PutMistralResponse.ts index 581909e650..54fc28dd08 100644 --- a/specification/inference/put_mistral/PutMistralResponse.ts +++ b/specification/inference/put_mistral/PutMistralResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoMistral } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoMistral } diff --git a/specification/inference/put_openai/PutOpenAiResponse.ts b/specification/inference/put_openai/PutOpenAiResponse.ts index 581909e650..2b3b8ac826 100644 --- a/specification/inference/put_openai/PutOpenAiResponse.ts +++ b/specification/inference/put_openai/PutOpenAiResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoOpenAI } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoOpenAI } diff --git a/specification/inference/put_voyageai/PutVoyageAIResponse.ts b/specification/inference/put_voyageai/PutVoyageAIResponse.ts index 581909e650..6f9d929762 100644 --- a/specification/inference/put_voyageai/PutVoyageAIResponse.ts +++ b/specification/inference/put_voyageai/PutVoyageAIResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoVoyageAI } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoVoyageAI } diff --git a/specification/inference/put_watsonx/PutWatsonxResponse.ts b/specification/inference/put_watsonx/PutWatsonxResponse.ts index 581909e650..6f2ad0aa0c 100644 --- a/specification/inference/put_watsonx/PutWatsonxResponse.ts +++ b/specification/inference/put_watsonx/PutWatsonxResponse.ts @@ -17,9 +17,9 @@ * under the License. */ -import { InferenceEndpointInfo } from '@inference/_types/Services' +import { InferenceEndpointInfoWatsonx } from '@inference/_types/Services' export class Response { /** @codegen_name endpoint_info */ - body: InferenceEndpointInfo + body: InferenceEndpointInfoWatsonx } From e1eb6d559b257683aa53cf097b16c96f852a4e41 Mon Sep 17 00:00:00 2001 From: kosabogi Date: Tue, 3 Jun 2025 15:03:34 +0200 Subject: [PATCH 2/3] Code style fix --- .../inference/chat_completion_unified/UnifiedRequest.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index 78e0b7912f..0fb996537e 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -23,10 +23,10 @@ import { Id } from '@_types/common' import { Duration } from '@_types/Time' /** * Perform chat completion inference - * - * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. + * + * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. * It only works with the `chat_completion` task type for `openai` and `elastic` inference services. - * + * * NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion inference API and the Stream inference API differ in their response structure and capabilities. * The Chat completion inference API provides more comprehensive customization options through more fields and function calling support. From 081147ba218ee52f994f548e738c25d2c3a69ae5 Mon Sep 17 00:00:00 2001 From: kosabogi Date: Thu, 5 Jun 2025 08:18:57 +0200 Subject: [PATCH 3/3] Code style fix --- .../inference/chat_completion_unified/UnifiedRequest.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index a17072203b..0fb996537e 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -26,7 +26,7 @@ import { Duration } from '@_types/Time' * * The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. * It only works with the `chat_completion` task type for `openai` and `elastic` inference services. - * + * * NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion inference API and the Stream inference API differ in their response structure and capabilities. * The Chat completion inference API provides more comprehensive customization options through more fields and function calling support.