Contextual AI docs

davidkyle · davidkyle · commit cef2af0bc982 · 2025-10-01T11:55:55.000+01:00
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1195,6 +1195,55 @@ export class CustomTaskSettings {
   parameters?: UserDefinedValue
 }
 
+export enum ContextualAIServiceType {
+  contextualai
+}
+
+export class ContextualAIServiceSettings {
+  /**
+   * A valid API key for your Contexutual AI account.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id contextualai-api-keys
+   */
+  api_key: string  
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Contextual AI documentation for the list of available rerank models.
+   * @ext_doc_id contextualai-rerank
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Contextual AI.
+   * The `contextualai` service sets a default number of requests allowed per minute depending on the task type.
+   * For `rerank`, it is set to `1000`.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export class ContextualAITaskSettings {
+  /**
+   * Instructions for the reranking model. Refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-instruction>
+   * Only for the `rerank` task type.
+   */
+  instruction?: string
+  /**
+   * Whether to return the source documents in the response.
+   * Only for the `rerank` task type.
+   * @server_default false
+   */
+  return_documents?: boolean
+  /**
+   * The number of most relevant documents to return.
+   * If not specified, the reranking results of all documents will be returned.
+   * Only for the `rerank` task type.
+   */
+  top_k?: integer
+}
+
 export class DeepSeekServiceSettings {
   /**
    * A valid API key for your DeepSeek account.
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -29,6 +29,7 @@ import {
   TaskTypeAzureAIStudio,
   TaskTypeAzureOpenAI,
   TaskTypeCohere,
+  TaskTypeContextualAI, 
   TaskTypeCustom,
   TaskTypeDeepSeek,
   TaskTypeElasticsearch,
@@ -168,6 +169,17 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint {
   task_type: TaskTypeCohere
 }
 
+export class InferenceEndpointInfoContextualAi extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeContextualAI
+}
+
 export class InferenceEndpointInfoCustom extends InferenceEndpoint {
   /**
    * The inference Id
@@ -389,6 +401,7 @@ export class RateLimitSetting {
    * * `azureopenai` service and task type `text_embedding`: `1440`
    * * `azureopenai` service and task type `completion`: `120`
    * * `cohere` service: `10000`
+   * * `contextualai` service: `1000`
    * * `elastic` service and task type `chat_completion`: `240`
    * * `googleaistudio` service: `360`
    * * `googlevertexai` service: `30000`
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -79,6 +79,10 @@ export enum TaskTypeCohere {
   completion
 }
 
+export enum TaskTypeContextualAI {
+  rerank
+}
+
 export enum TaskTypeCustom {
   text_embedding,
   sparse_embedding,
diff --git a/specification/inference/put_contextualai/PutContextualAiRequest.ts b/specification/inference/put_contextualai/PutContextualAiRequest.ts
@@ -0,0 +1,87 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import { TaskTypeContextualAI } from '@inference/_types/TaskType'
+import {
+  ContextualAIServiceSettings,
+  ContextualAIServiceType,
+  ContextualAITaskSettings,
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
+
+/**
+ * Create an Contextual AI inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `contexualai` service.
+ *
+ * To review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.
+ * @rest_spec_name inference.put_contextualai
+ * @availability stack since=9.2.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-contextualai
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{contextualai_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: TaskTypeContextualAI
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    contextualai_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `contextualai`.
+     */
+    service: ContextualAIServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `contextualai` service.
+     */
+    service_settings: ContextualAIServiceSettings
+    /**
+     * Settings to configure the inference task.
+     * These settings are specific to the task type you specified.
+     */
+    task_settings?: ContextualAITaskSettings
+  }
+}
diff --git a/specification/inference/put_contextualai/PutContextualAiResponse.ts b/specification/inference/put_contextualai/PutContextualAiResponse.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfoContextualAi } from '@inference/_types/Services'
+
+export class Response {
+  /** @codegen_name endpoint_info */
+  body: InferenceEndpointInfoContextualAi
+}
diff --git a/specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml b/specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml
@@ -0,0 +1,16 @@
+summary: A rerank task
+description: Run `PUT _inference/rerank/contextualai-rerank` to create an inference endpoint for rerank tasks using the Contextual AI service.
+method_request: 'PUT _inference/rerank/contextualai-rerank'
+# type: "request"
+value: |-
+  {
+      "service": "contextualai",
+      "service_settings": {
+          "api_key": "ContextualAI-Api-key",
+          "model_id": "ctxl-rerank-v2-instruct-multilingual-mini"
+      },
+      "task_settings": {
+          "instruction": "Rerank the following documents based on their relevance to the query.",
+          "top_k": 3
+      }
+  }