From e173e376626b72b76f6e1cac4ed8b8007a77b92b Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 2 Oct 2025 14:31:17 +0100
Subject: [PATCH] Add Contextual AI Inference service docs (#5383)

* Contextual AI docs

* Fix CI

---------

Co-authored-by: Quentin Pradet <quentin.pradet@elastic.co>
(cherry picked from commit b18318c3804c3c063e5fe26926cf90d93ec81523)
---
 output/openapi/elasticsearch-openapi.json     | 205 ++++++-
 .../elasticsearch-serverless-openapi.json     | 205 ++++++-
 output/schema/schema.json                     | 517 +++++++++++++++---
 output/typescript/types.ts                    |  35 ++
 specification/_doc_ids/table.csv              |   1 +
 .../inference.put_contextualai.json           |  35 ++
 specification/inference/_types/CommonTypes.ts |  49 ++
 specification/inference/_types/Services.ts    |  13 +
 specification/inference/_types/TaskType.ts    |   4 +
 .../PutContextualAiRequest.ts                 |  87 +++
 .../PutContextualAiResponse.ts                |  25 +
 .../PutContextualAiRequestExample1.yaml       |  16 +
 12 files changed, 1105 insertions(+), 87 deletions(-)
 create mode 100644 specification/_json_spec/inference.put_contextualai.json
 create mode 100644 specification/inference/put_contextualai/PutContextualAiRequest.ts
 create mode 100644 specification/inference/put_contextualai/PutContextualAiResponse.ts
 create mode 100644 specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 93b0d630a5..76b5f3edaa 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -21888,6 +21888,126 @@
         ]
       }
     },
+    "/_inference/{task_type}/{contextualai_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an Contextual AI inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `contexualai` service.\n\nTo review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n",
+        "operationId": "inference-put-contextualai",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference._types.TaskTypeContextualAI"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "contextualai_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "externalDocs": {
+                      "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config"
+                    },
+                    "description": "The chunking configuration object.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings"
+                      }
+                    ]
+                  },
+                  "service": {
+                    "description": "The type of service supported for the specified task type. In this case, `contextualai`.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAIServiceType"
+                      }
+                    ]
+                  },
+                  "service_settings": {
+                    "description": "Settings used to install the inference model. These settings are specific to the `contextualai` service.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAIServiceSettings"
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAITaskSettings"
+                      }
+                    ]
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutContextualAiRequestExample1": {
+                  "summary": "A rerank task",
+                  "description": "Run `PUT _inference/rerank/contextualai-rerank` to create an inference endpoint for rerank tasks using the Contextual AI service.",
+                  "value": "{\n    \"service\": \"contextualai\",\n    \"service_settings\": {\n        \"api_key\": \"ContextualAI-Api-key\",\n        \"model_id\": \"ctxl-rerank-v2-instruct-multilingual-mini\"\n    },\n    \"task_settings\": {\n        \"instruction\": \"Rerank the following documents based on their relevance to the query.\",\n        \"top_k\": 3\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoContextualAi"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Generally available; Added in 9.2.0",
+        "x-metaTags": [
+          {
+            "content": "Elasticsearch, Machine Learning",
+            "name": "product_name"
+          }
+        ]
+      }
+    },
     "/_inference/{task_type}/{custom_inference_id}": {
       "put": {
         "tags": [
@@ -100911,7 +101031,7 @@
         "type": "object",
         "properties": {
           "requests_per_minute": {
-            "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
+            "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
             "type": "number"
           }
         }
@@ -101833,6 +101953,89 @@
           "completion"
         ]
       },
+      "inference._types.TaskTypeContextualAI": {
+        "type": "string",
+        "enum": [
+          "rerank"
+        ]
+      },
+      "inference._types.ContextualAIServiceType": {
+        "type": "string",
+        "enum": [
+          "contextualai"
+        ]
+      },
+      "inference._types.ContextualAIServiceSettings": {
+        "type": "object",
+        "properties": {
+          "api_key": {
+            "description": "A valid API key for your Contexutual AI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+            "type": "string"
+          },
+          "model_id": {
+            "description": "The name of the model to use for the inference task.\nRefer to the Contextual AI documentation for the list of available rerank models.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "description": "This setting helps to minimize the number of rate limit errors returned from Contextual AI.\nThe `contextualai` service sets a default number of requests allowed per minute depending on the task type.\nFor `rerank`, it is set to `1000`.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/inference._types.RateLimitSetting"
+              }
+            ]
+          }
+        },
+        "required": [
+          "api_key",
+          "model_id"
+        ]
+      },
+      "inference._types.ContextualAITaskSettings": {
+        "type": "object",
+        "properties": {
+          "instruction": {
+            "description": "Instructions for the reranking model. Refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-instruction>\nOnly for the `rerank` task type.",
+            "type": "string"
+          },
+          "return_documents": {
+            "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.",
+            "default": false,
+            "type": "boolean"
+          },
+          "top_k": {
+            "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.",
+            "type": "number"
+          }
+        }
+      },
+      "inference._types.InferenceEndpointInfoContextualAi": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/inference._types.InferenceEndpoint"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "inference_id": {
+                "description": "The inference Id",
+                "type": "string"
+              },
+              "task_type": {
+                "description": "The task type",
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/inference._types.TaskTypeContextualAI"
+                  }
+                ]
+              }
+            },
+            "required": [
+              "inference_id",
+              "task_type"
+            ]
+          }
+        ]
+      },
       "inference._types.CustomTaskType": {
         "type": "string",
         "enum": [
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 9189132494..95dcc42e0e 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -12872,6 +12872,126 @@
         ]
       }
     },
+    "/_inference/{task_type}/{contextualai_inference_id}": {
+      "put": {
+        "tags": [
+          "inference"
+        ],
+        "summary": "Create an Contextual AI inference endpoint",
+        "description": "Create an inference endpoint to perform an inference task with the `contexualai` service.\n\nTo review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n",
+        "operationId": "inference-put-contextualai",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "task_type",
+            "description": "The type of the inference task that the model will perform.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/inference._types.TaskTypeContextualAI"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "path",
+            "name": "contextualai_inference_id",
+            "description": "The unique identifier of the inference endpoint.",
+            "required": true,
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Id"
+            },
+            "style": "simple"
+          },
+          {
+            "in": "query",
+            "name": "timeout",
+            "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+            "deprecated": false,
+            "schema": {
+              "$ref": "#/components/schemas/_types.Duration"
+            },
+            "style": "form"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "chunking_settings": {
+                    "externalDocs": {
+                      "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config"
+                    },
+                    "description": "The chunking configuration object.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings"
+                      }
+                    ]
+                  },
+                  "service": {
+                    "description": "The type of service supported for the specified task type. In this case, `contextualai`.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAIServiceType"
+                      }
+                    ]
+                  },
+                  "service_settings": {
+                    "description": "Settings used to install the inference model. These settings are specific to the `contextualai` service.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAIServiceSettings"
+                      }
+                    ]
+                  },
+                  "task_settings": {
+                    "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.",
+                    "allOf": [
+                      {
+                        "$ref": "#/components/schemas/inference._types.ContextualAITaskSettings"
+                      }
+                    ]
+                  }
+                },
+                "required": [
+                  "service",
+                  "service_settings"
+                ]
+              },
+              "examples": {
+                "PutContextualAiRequestExample1": {
+                  "summary": "A rerank task",
+                  "description": "Run `PUT _inference/rerank/contextualai-rerank` to create an inference endpoint for rerank tasks using the Contextual AI service.",
+                  "value": "{\n    \"service\": \"contextualai\",\n    \"service_settings\": {\n        \"api_key\": \"ContextualAI-Api-key\",\n        \"model_id\": \"ctxl-rerank-v2-instruct-multilingual-mini\"\n    },\n    \"task_settings\": {\n        \"instruction\": \"Rerank the following documents based on their relevance to the query.\",\n        \"top_k\": 3\n    }\n}"
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoContextualAi"
+                }
+              }
+            }
+          }
+        },
+        "x-state": "Generally available",
+        "x-metaTags": [
+          {
+            "content": "Elasticsearch, Machine Learning",
+            "name": "product_name"
+          }
+        ]
+      }
+    },
     "/_inference/{task_type}/{custom_inference_id}": {
       "put": {
         "tags": [
@@ -65016,7 +65136,7 @@
         "type": "object",
         "properties": {
           "requests_per_minute": {
-            "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
+            "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
             "type": "number"
           }
         }
@@ -65938,6 +66058,89 @@
           "completion"
         ]
       },
+      "inference._types.TaskTypeContextualAI": {
+        "type": "string",
+        "enum": [
+          "rerank"
+        ]
+      },
+      "inference._types.ContextualAIServiceType": {
+        "type": "string",
+        "enum": [
+          "contextualai"
+        ]
+      },
+      "inference._types.ContextualAIServiceSettings": {
+        "type": "object",
+        "properties": {
+          "api_key": {
+            "description": "A valid API key for your Contexutual AI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+            "type": "string"
+          },
+          "model_id": {
+            "description": "The name of the model to use for the inference task.\nRefer to the Contextual AI documentation for the list of available rerank models.",
+            "type": "string"
+          },
+          "rate_limit": {
+            "description": "This setting helps to minimize the number of rate limit errors returned from Contextual AI.\nThe `contextualai` service sets a default number of requests allowed per minute depending on the task type.\nFor `rerank`, it is set to `1000`.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/inference._types.RateLimitSetting"
+              }
+            ]
+          }
+        },
+        "required": [
+          "api_key",
+          "model_id"
+        ]
+      },
+      "inference._types.ContextualAITaskSettings": {
+        "type": "object",
+        "properties": {
+          "instruction": {
+            "description": "Instructions for the reranking model. Refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-instruction>\nOnly for the `rerank` task type.",
+            "type": "string"
+          },
+          "return_documents": {
+            "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.",
+            "default": false,
+            "type": "boolean"
+          },
+          "top_k": {
+            "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.",
+            "type": "number"
+          }
+        }
+      },
+      "inference._types.InferenceEndpointInfoContextualAi": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/inference._types.InferenceEndpoint"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "inference_id": {
+                "description": "The inference Id",
+                "type": "string"
+              },
+              "task_type": {
+                "description": "The task type",
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/inference._types.TaskTypeContextualAI"
+                  }
+                ]
+              }
+            },
+            "required": [
+              "inference_id",
+              "task_type"
+            ]
+          }
+        ]
+      },
       "inference._types.CustomTaskType": {
         "type": "string",
         "enum": [
diff --git a/output/schema/schema.json b/output/schema/schema.json
index 79e43b84db..49fd2464a2 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -10342,6 +10342,51 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "9.2.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Create an Contextual AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `contexualai` service.\n\nTo review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.",
+      "docId": "inference-api-put-contextualai",
+      "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-contextualai",
+      "name": "inference.put_contextualai",
+      "privileges": {
+        "cluster": [
+          "manage_inference"
+        ]
+      },
+      "request": {
+        "name": "Request",
+        "namespace": "inference.put_contextualai"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.put_contextualai"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "PUT"
+          ],
+          "path": "/_inference/{task_type}/{contextualai_inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -171224,6 +171269,114 @@
       ],
       "specLocation": "inference/_types/CommonTypes.ts#L123-L135"
     },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ContextualAIServiceSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "A valid API key for your Contexutual AI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.",
+          "extDocId": "contextualai-api-keys",
+          "name": "api_key",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the model to use for the inference task.\nRefer to the Contextual AI documentation for the list of available rerank models.",
+          "extDocId": "contextualai-rerank",
+          "name": "model_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "This setting helps to minimize the number of rate limit errors returned from Contextual AI.\nThe `contextualai` service sets a default number of requests allowed per minute depending on the task type.\nFor `rerank`, it is set to `1000`.",
+          "name": "rate_limit",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RateLimitSetting",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L1202-L1225"
+    },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "contextualai"
+        }
+      ],
+      "name": {
+        "name": "ContextualAIServiceType",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/CommonTypes.ts#L1198-L1200"
+    },
+    {
+      "kind": "interface",
+      "name": {
+        "name": "ContextualAITaskSettings",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "Instructions for the reranking model. Refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-instruction>\nOnly for the `rerank` task type.",
+          "name": "instruction",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "Whether to return the source documents in the response.\nOnly for the `rerank` task type.",
+          "name": "return_documents",
+          "required": false,
+          "serverDefault": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The number of most relevant documents to return.\nIf not specified, the reranking results of all documents will be returned.\nOnly for the `rerank` task type.",
+          "name": "top_k",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/CommonTypes.ts#L1227-L1245"
+    },
     {
       "kind": "interface",
       "name": {
@@ -171441,7 +171594,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1198-L1220"
+      "specLocation": "inference/_types/CommonTypes.ts#L1247-L1269"
     },
     {
       "kind": "enum",
@@ -171454,7 +171607,7 @@
         "name": "DeepSeekServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1222-L1224"
+      "specLocation": "inference/_types/CommonTypes.ts#L1271-L1273"
     },
     {
       "kind": "interface",
@@ -171595,7 +171748,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1226-L1260"
+      "specLocation": "inference/_types/CommonTypes.ts#L1275-L1309"
     },
     {
       "kind": "enum",
@@ -171608,7 +171761,7 @@
         "name": "ElasticsearchServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1276-L1278"
+      "specLocation": "inference/_types/CommonTypes.ts#L1325-L1327"
     },
     {
       "kind": "interface",
@@ -171631,7 +171784,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1262-L1268"
+      "specLocation": "inference/_types/CommonTypes.ts#L1311-L1317"
     },
     {
       "kind": "enum",
@@ -171650,7 +171803,7 @@
         "name": "ElasticsearchTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1270-L1274"
+      "specLocation": "inference/_types/CommonTypes.ts#L1319-L1323"
     },
     {
       "kind": "interface",
@@ -171696,7 +171849,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1280-L1306"
+      "specLocation": "inference/_types/CommonTypes.ts#L1329-L1355"
     },
     {
       "kind": "enum",
@@ -171709,7 +171862,7 @@
         "name": "ElserServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1312-L1314"
+      "specLocation": "inference/_types/CommonTypes.ts#L1361-L1363"
     },
     {
       "kind": "enum",
@@ -171722,7 +171875,7 @@
         "name": "ElserTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1308-L1310"
+      "specLocation": "inference/_types/CommonTypes.ts#L1357-L1359"
     },
     {
       "kind": "enum",
@@ -171735,7 +171888,7 @@
         "name": "GoogleAiServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1339-L1341"
+      "specLocation": "inference/_types/CommonTypes.ts#L1388-L1390"
     },
     {
       "kind": "interface",
@@ -171783,7 +171936,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1316-L1332"
+      "specLocation": "inference/_types/CommonTypes.ts#L1365-L1381"
     },
     {
       "kind": "enum",
@@ -171799,7 +171952,7 @@
         "name": "GoogleAiStudioTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1334-L1337"
+      "specLocation": "inference/_types/CommonTypes.ts#L1383-L1386"
     },
     {
       "kind": "interface",
@@ -171887,7 +172040,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1343-L1376"
+      "specLocation": "inference/_types/CommonTypes.ts#L1392-L1425"
     },
     {
       "kind": "enum",
@@ -171900,7 +172053,7 @@
         "name": "GoogleVertexAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1409-L1411"
+      "specLocation": "inference/_types/CommonTypes.ts#L1458-L1460"
     },
     {
       "kind": "interface",
@@ -171948,7 +172101,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1378-L1393"
+      "specLocation": "inference/_types/CommonTypes.ts#L1427-L1442"
     },
     {
       "kind": "enum",
@@ -171970,7 +172123,7 @@
         "name": "GoogleVertexAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1402-L1407"
+      "specLocation": "inference/_types/CommonTypes.ts#L1451-L1456"
     },
     {
       "kind": "interface",
@@ -172032,7 +172185,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1413-L1445"
+      "specLocation": "inference/_types/CommonTypes.ts#L1462-L1494"
     },
     {
       "kind": "enum",
@@ -172045,7 +172198,7 @@
         "name": "HuggingFaceServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1466-L1468"
+      "specLocation": "inference/_types/CommonTypes.ts#L1515-L1517"
     },
     {
       "kind": "interface",
@@ -172079,7 +172232,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1447-L1457"
+      "specLocation": "inference/_types/CommonTypes.ts#L1496-L1506"
     },
     {
       "kind": "enum",
@@ -172101,7 +172254,7 @@
         "name": "HuggingFaceTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1459-L1464"
+      "specLocation": "inference/_types/CommonTypes.ts#L1508-L1513"
     },
     {
       "kind": "interface",
@@ -172193,7 +172346,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L313-L372"
+      "specLocation": "inference/_types/Services.ts#L325-L384"
     },
     {
       "kind": "interface",
@@ -172252,7 +172405,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L47-L67"
+      "specLocation": "inference/_types/Services.ts#L48-L68"
     },
     {
       "kind": "interface",
@@ -172293,7 +172446,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L69-L81"
+      "specLocation": "inference/_types/Services.ts#L70-L82"
     },
     {
       "kind": "interface",
@@ -172333,7 +172486,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L83-L92"
+      "specLocation": "inference/_types/Services.ts#L84-L93"
     },
     {
       "kind": "interface",
@@ -172373,7 +172526,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L94-L103"
+      "specLocation": "inference/_types/Services.ts#L95-L104"
     },
     {
       "kind": "interface",
@@ -172413,7 +172566,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L105-L114"
+      "specLocation": "inference/_types/Services.ts#L106-L115"
     },
     {
       "kind": "interface",
@@ -172453,7 +172606,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L116-L125"
+      "specLocation": "inference/_types/Services.ts#L117-L126"
     },
     {
       "kind": "interface",
@@ -172493,7 +172646,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L127-L136"
+      "specLocation": "inference/_types/Services.ts#L128-L137"
     },
     {
       "kind": "interface",
@@ -172533,7 +172686,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L138-L147"
+      "specLocation": "inference/_types/Services.ts#L139-L148"
     },
     {
       "kind": "interface",
@@ -172573,7 +172726,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L149-L158"
+      "specLocation": "inference/_types/Services.ts#L150-L159"
     },
     {
       "kind": "interface",
@@ -172613,7 +172766,47 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L160-L169"
+      "specLocation": "inference/_types/Services.ts#L161-L170"
+    },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "InferenceEndpoint",
+          "namespace": "inference._types"
+        }
+      },
+      "name": {
+        "name": "InferenceEndpointInfoContextualAi",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The task type",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskTypeContextualAI",
+              "namespace": "inference._types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Services.ts#L172-L181"
     },
     {
       "kind": "interface",
@@ -172653,7 +172846,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L171-L180"
+      "specLocation": "inference/_types/Services.ts#L183-L192"
     },
     {
       "kind": "interface",
@@ -172693,7 +172886,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L181-L190"
+      "specLocation": "inference/_types/Services.ts#L193-L202"
     },
     {
       "kind": "interface",
@@ -172733,7 +172926,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L203-L212"
+      "specLocation": "inference/_types/Services.ts#L215-L224"
     },
     {
       "kind": "interface",
@@ -172773,7 +172966,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L192-L201"
+      "specLocation": "inference/_types/Services.ts#L204-L213"
     },
     {
       "kind": "interface",
@@ -172813,7 +173006,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L214-L223"
+      "specLocation": "inference/_types/Services.ts#L226-L235"
     },
     {
       "kind": "interface",
@@ -172853,7 +173046,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L225-L234"
+      "specLocation": "inference/_types/Services.ts#L237-L246"
     },
     {
       "kind": "interface",
@@ -172893,7 +173086,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L236-L245"
+      "specLocation": "inference/_types/Services.ts#L248-L257"
     },
     {
       "kind": "interface",
@@ -172933,7 +173126,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L247-L256"
+      "specLocation": "inference/_types/Services.ts#L259-L268"
     },
     {
       "kind": "interface",
@@ -172973,7 +173166,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L258-L267"
+      "specLocation": "inference/_types/Services.ts#L270-L279"
     },
     {
       "kind": "interface",
@@ -173013,7 +173206,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L269-L278"
+      "specLocation": "inference/_types/Services.ts#L281-L290"
     },
     {
       "kind": "interface",
@@ -173053,7 +173246,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L280-L289"
+      "specLocation": "inference/_types/Services.ts#L292-L301"
     },
     {
       "kind": "interface",
@@ -173093,7 +173286,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L291-L300"
+      "specLocation": "inference/_types/Services.ts#L303-L312"
     },
     {
       "kind": "interface",
@@ -173133,7 +173326,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L302-L311"
+      "specLocation": "inference/_types/Services.ts#L314-L323"
     },
     {
       "kind": "interface",
@@ -173293,7 +173486,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1470-L1499"
+      "specLocation": "inference/_types/CommonTypes.ts#L1519-L1548"
     },
     {
       "kind": "enum",
@@ -173306,7 +173499,7 @@
         "name": "JinaAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1529-L1531"
+      "specLocation": "inference/_types/CommonTypes.ts#L1578-L1580"
     },
     {
       "kind": "enum",
@@ -173325,7 +173518,7 @@
         "name": "JinaAISimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1533-L1537"
+      "specLocation": "inference/_types/CommonTypes.ts#L1582-L1586"
     },
     {
       "kind": "interface",
@@ -173371,7 +173564,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1501-L1522"
+      "specLocation": "inference/_types/CommonTypes.ts#L1550-L1571"
     },
     {
       "kind": "enum",
@@ -173387,7 +173580,7 @@
         "name": "JinaAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1524-L1527"
+      "specLocation": "inference/_types/CommonTypes.ts#L1573-L1576"
     },
     {
       "kind": "enum",
@@ -173409,7 +173602,7 @@
         "name": "JinaAITextEmbeddingTask",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1539-L1544"
+      "specLocation": "inference/_types/CommonTypes.ts#L1588-L1593"
     },
     {
       "kind": "interface",
@@ -173481,7 +173674,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1546-L1576"
+      "specLocation": "inference/_types/CommonTypes.ts#L1595-L1625"
     },
     {
       "kind": "enum",
@@ -173494,7 +173687,7 @@
         "name": "LlamaServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1584-L1586"
+      "specLocation": "inference/_types/CommonTypes.ts#L1633-L1635"
     },
     {
       "kind": "enum",
@@ -173513,7 +173706,7 @@
         "name": "LlamaSimilarityType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1588-L1592"
+      "specLocation": "inference/_types/CommonTypes.ts#L1637-L1641"
     },
     {
       "kind": "enum",
@@ -173532,7 +173725,7 @@
         "name": "LlamaTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1578-L1582"
+      "specLocation": "inference/_types/CommonTypes.ts#L1627-L1631"
     },
     {
       "kind": "interface",
@@ -173690,7 +173883,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1594-L1621"
+      "specLocation": "inference/_types/CommonTypes.ts#L1643-L1670"
     },
     {
       "kind": "enum",
@@ -173703,7 +173896,7 @@
         "name": "MistralServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1629-L1631"
+      "specLocation": "inference/_types/CommonTypes.ts#L1678-L1680"
     },
     {
       "kind": "enum",
@@ -173722,7 +173915,7 @@
         "name": "MistralTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1623-L1627"
+      "specLocation": "inference/_types/CommonTypes.ts#L1672-L1676"
     },
     {
       "kind": "interface",
@@ -173809,7 +174002,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1633-L1675"
+      "specLocation": "inference/_types/CommonTypes.ts#L1682-L1724"
     },
     {
       "kind": "enum",
@@ -173822,7 +174015,7 @@
         "name": "OpenAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1691-L1693"
+      "specLocation": "inference/_types/CommonTypes.ts#L1740-L1742"
     },
     {
       "kind": "interface",
@@ -173844,7 +174037,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1677-L1683"
+      "specLocation": "inference/_types/CommonTypes.ts#L1726-L1732"
     },
     {
       "kind": "enum",
@@ -173863,7 +174056,7 @@
         "name": "OpenAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1685-L1689"
+      "specLocation": "inference/_types/CommonTypes.ts#L1734-L1738"
     },
     {
       "kind": "interface",
@@ -173918,7 +174111,7 @@
       },
       "properties": [
         {
-          "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
+          "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`",
           "name": "requests_per_minute",
           "required": false,
           "type": {
@@ -173930,7 +174123,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Services.ts#L378-L405"
+      "specLocation": "inference/_types/Services.ts#L390-L418"
     },
     {
       "kind": "interface",
@@ -174078,7 +174271,7 @@
         "name": "ServiceSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L374-L374",
+      "specLocation": "inference/_types/Services.ts#L386-L386",
       "type": {
         "kind": "user_defined_value"
       }
@@ -174162,7 +174355,7 @@
         "name": "TaskSettings",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Services.ts#L376-L376",
+      "specLocation": "inference/_types/Services.ts#L388-L388",
       "type": {
         "kind": "user_defined_value"
       }
@@ -174338,6 +174531,19 @@
       },
       "specLocation": "inference/_types/TaskType.ts#L76-L80"
     },
+    {
+      "kind": "enum",
+      "members": [
+        {
+          "name": "rerank"
+        }
+      ],
+      "name": {
+        "name": "TaskTypeContextualAI",
+        "namespace": "inference._types"
+      },
+      "specLocation": "inference/_types/TaskType.ts#L82-L84"
+    },
     {
       "kind": "enum",
       "members": [
@@ -174358,7 +174564,7 @@
         "name": "TaskTypeCustom",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L82-L87"
+      "specLocation": "inference/_types/TaskType.ts#L86-L91"
     },
     {
       "kind": "enum",
@@ -174374,7 +174580,7 @@
         "name": "TaskTypeDeepSeek",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L89-L92"
+      "specLocation": "inference/_types/TaskType.ts#L93-L96"
     },
     {
       "kind": "enum",
@@ -174387,7 +174593,7 @@
         "name": "TaskTypeELSER",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L100-L102"
+      "specLocation": "inference/_types/TaskType.ts#L104-L106"
     },
     {
       "kind": "enum",
@@ -174406,7 +174612,7 @@
         "name": "TaskTypeElasticsearch",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L94-L98"
+      "specLocation": "inference/_types/TaskType.ts#L98-L102"
     },
     {
       "kind": "enum",
@@ -174422,7 +174628,7 @@
         "name": "TaskTypeGoogleAIStudio",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L104-L107"
+      "specLocation": "inference/_types/TaskType.ts#L108-L111"
     },
     {
       "kind": "enum",
@@ -174438,7 +174644,7 @@
         "name": "TaskTypeGoogleVertexAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L109-L112"
+      "specLocation": "inference/_types/TaskType.ts#L113-L116"
     },
     {
       "kind": "enum",
@@ -174460,7 +174666,7 @@
         "name": "TaskTypeHuggingFace",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L114-L119"
+      "specLocation": "inference/_types/TaskType.ts#L118-L123"
     },
     {
       "kind": "enum",
@@ -174495,7 +174701,7 @@
         "name": "TaskTypeLlama",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L121-L125"
+      "specLocation": "inference/_types/TaskType.ts#L125-L129"
     },
     {
       "kind": "enum",
@@ -174514,7 +174720,7 @@
         "name": "TaskTypeMistral",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L127-L131"
+      "specLocation": "inference/_types/TaskType.ts#L131-L135"
     },
     {
       "kind": "enum",
@@ -174533,7 +174739,7 @@
         "name": "TaskTypeOpenAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L133-L137"
+      "specLocation": "inference/_types/TaskType.ts#L137-L141"
     },
     {
       "kind": "enum",
@@ -174549,7 +174755,7 @@
         "name": "TaskTypeVoyageAI",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L139-L142"
+      "specLocation": "inference/_types/TaskType.ts#L143-L146"
     },
     {
       "kind": "enum",
@@ -174568,7 +174774,7 @@
         "name": "TaskTypeWatsonx",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L144-L148"
+      "specLocation": "inference/_types/TaskType.ts#L148-L152"
     },
     {
       "kind": "interface",
@@ -174690,7 +174896,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1395-L1400"
+      "specLocation": "inference/_types/CommonTypes.ts#L1444-L1449"
     },
     {
       "kind": "interface",
@@ -174836,7 +175042,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1695-L1726"
+      "specLocation": "inference/_types/CommonTypes.ts#L1744-L1775"
     },
     {
       "kind": "enum",
@@ -174849,7 +175055,7 @@
         "name": "VoyageAIServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1759-L1761"
+      "specLocation": "inference/_types/CommonTypes.ts#L1808-L1810"
     },
     {
       "kind": "interface",
@@ -174909,7 +175115,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1728-L1752"
+      "specLocation": "inference/_types/CommonTypes.ts#L1777-L1801"
     },
     {
       "kind": "enum",
@@ -174925,7 +175131,7 @@
         "name": "VoyageAITaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1754-L1757"
+      "specLocation": "inference/_types/CommonTypes.ts#L1803-L1806"
     },
     {
       "kind": "interface",
@@ -175013,7 +175219,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/CommonTypes.ts#L1763-L1801"
+      "specLocation": "inference/_types/CommonTypes.ts#L1812-L1850"
     },
     {
       "kind": "enum",
@@ -175026,7 +175232,7 @@
         "name": "WatsonxServiceType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1809-L1811"
+      "specLocation": "inference/_types/CommonTypes.ts#L1858-L1860"
     },
     {
       "kind": "enum",
@@ -175045,7 +175251,7 @@
         "name": "WatsonxTaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/CommonTypes.ts#L1803-L1807"
+      "specLocation": "inference/_types/CommonTypes.ts#L1852-L1856"
     },
     {
       "kind": "request",
@@ -177517,6 +177723,147 @@
       },
       "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L25"
     },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "The chunking configuration object.",
+            "extDocId": "inference-chunking",
+            "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config",
+            "name": "chunking_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "InferenceChunkingSettings",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "The type of service supported for the specified task type. In this case, `contextualai`.",
+            "name": "service",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContextualAIServiceType",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "Settings used to install the inference model. These settings are specific to the `contextualai` service.",
+            "name": "service_settings",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContextualAIServiceSettings",
+                "namespace": "inference._types"
+              }
+            }
+          },
+          {
+            "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.",
+            "name": "task_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContextualAITaskSettings",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        ]
+      },
+      "description": "Create an Contextual AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `contexualai` service.\n\nTo review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.",
+      "examples": {
+        "PutContextualAiRequestExample1": {
+          "description": "Run `PUT _inference/rerank/contextualai-rerank` to create an inference endpoint for rerank tasks using the Contextual AI service.",
+          "method_request": "PUT _inference/rerank/contextualai-rerank",
+          "summary": "A rerank task",
+          "value": "{\n    \"service\": \"contextualai\",\n    \"service_settings\": {\n        \"api_key\": \"ContextualAI-Api-key\",\n        \"model_id\": \"ctxl-rerank-v2-instruct-multilingual-mini\"\n    },\n    \"task_settings\": {\n        \"instruction\": \"Rerank the following documents based on their relevance to the query.\",\n        \"top_k\": 3\n    }\n}"
+        }
+      },
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.put_contextualai"
+      },
+      "path": [
+        {
+          "description": "The type of the inference task that the model will perform.",
+          "name": "task_type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskTypeContextualAI",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The unique identifier of the inference endpoint.",
+          "name": "contextualai_inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/put_contextualai/PutContextualAiRequest.ts#L31-L87"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "codegenName": "endpoint_info",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "InferenceEndpointInfoContextualAi",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.put_contextualai"
+      },
+      "specLocation": "inference/put_contextualai/PutContextualAiResponse.ts#L22-L25"
+    },
     {
       "kind": "request",
       "attachedBehaviors": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 372f744ce0..4e691b87d7 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -14017,6 +14017,20 @@ export interface InferenceContentObject {
   type: string
 }
 
+export interface InferenceContextualAIServiceSettings {
+  api_key: string
+  model_id: string
+  rate_limit?: InferenceRateLimitSetting
+}
+
+export type InferenceContextualAIServiceType = 'contextualai'
+
+export interface InferenceContextualAITaskSettings {
+  instruction?: string
+  return_documents?: boolean
+  top_k?: integer
+}
+
 export interface InferenceCustomRequestParams {
   content: string
 }
@@ -14191,6 +14205,11 @@ export interface InferenceInferenceEndpointInfoCohere extends InferenceInference
   task_type: InferenceTaskTypeCohere
 }
 
+export interface InferenceInferenceEndpointInfoContextualAi extends InferenceInferenceEndpoint {
+  inference_id: string
+  task_type: InferenceTaskTypeContextualAI
+}
+
 export interface InferenceInferenceEndpointInfoCustom extends InferenceInferenceEndpoint {
   inference_id: string
   task_type: InferenceTaskTypeCustom
@@ -14394,6 +14413,8 @@ export type InferenceTaskTypeAzureOpenAI = 'text_embedding' | 'completion'
 
 export type InferenceTaskTypeCohere = 'text_embedding' | 'rerank' | 'completion'
 
+export type InferenceTaskTypeContextualAI = 'rerank'
+
 export type InferenceTaskTypeCustom = 'text_embedding' | 'sparse_embedding' | 'rerank' | 'completion'
 
 export type InferenceTaskTypeDeepSeek = 'completion' | 'chat_completion'
@@ -14650,6 +14671,20 @@ export interface InferencePutCohereRequest extends RequestBase {
 
 export type InferencePutCohereResponse = InferenceInferenceEndpointInfoCohere
 
+export interface InferencePutContextualaiRequest extends RequestBase {
+  task_type: InferenceTaskTypeContextualAI
+  contextualai_inference_id: Id
+  timeout?: Duration
+  body?: {
+    chunking_settings?: InferenceInferenceChunkingSettings
+    service: InferenceContextualAIServiceType
+    service_settings: InferenceContextualAIServiceSettings
+    task_settings?: InferenceContextualAITaskSettings
+  }
+}
+
+export type InferencePutContextualaiResponse = InferenceInferenceEndpointInfoContextualAi
+
 export interface InferencePutCustomRequest extends RequestBase {
   task_type: InferenceCustomTaskType
   custom_inference_id: Id
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index 34238c58e8..23941a5133 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -373,6 +373,7 @@ inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/op
 inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html,
 inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html,
 inference-api-put-cohere,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html,
+inference-api-put-contextualai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-contextualai,,
 inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-custom.html,
 inference-api-put-deepseek,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-deepseek.html,
 inference-api-put-eis,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis,,
diff --git a/specification/_json_spec/inference.put_contextualai.json b/specification/_json_spec/inference.put_contextualai.json
new file mode 100644
index 0000000000..983ce42738
--- /dev/null
+++ b/specification/_json_spec/inference.put_contextualai.json
@@ -0,0 +1,35 @@
+{
+  "inference.put_contextualai": {
+    "documentation": {
+      "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-contextualai",
+      "description": "Create an Contextual AI inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{contextualai_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "contextualai_inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings"
+    }
+  }
+}
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
index 7c05f05b65..25d1be939a 100644
--- a/specification/inference/_types/CommonTypes.ts
+++ b/specification/inference/_types/CommonTypes.ts
@@ -1195,6 +1195,55 @@ export class CustomTaskSettings {
   parameters?: UserDefinedValue
 }
 
+export enum ContextualAIServiceType {
+  contextualai
+}
+
+export class ContextualAIServiceSettings {
+  /**
+   * A valid API key for your Contexutual AI account.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   * @ext_doc_id contextualai-api-keys
+   */
+  api_key: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Contextual AI documentation for the list of available rerank models.
+   * @ext_doc_id contextualai-rerank
+   */
+  model_id: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from Contextual AI.
+   * The `contextualai` service sets a default number of requests allowed per minute depending on the task type.
+   * For `rerank`, it is set to `1000`.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export class ContextualAITaskSettings {
+  /**
+   * Instructions for the reranking model. Refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-instruction>
+   * Only for the `rerank` task type.
+   */
+  instruction?: string
+  /**
+   * Whether to return the source documents in the response.
+   * Only for the `rerank` task type.
+   * @server_default false
+   */
+  return_documents?: boolean
+  /**
+   * The number of most relevant documents to return.
+   * If not specified, the reranking results of all documents will be returned.
+   * Only for the `rerank` task type.
+   */
+  top_k?: integer
+}
+
 export class DeepSeekServiceSettings {
   /**
    * A valid API key for your DeepSeek account.
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
index 1af04c1928..788604d1d1 100644
--- a/specification/inference/_types/Services.ts
+++ b/specification/inference/_types/Services.ts
@@ -29,6 +29,7 @@ import {
   TaskTypeAzureAIStudio,
   TaskTypeAzureOpenAI,
   TaskTypeCohere,
+  TaskTypeContextualAI,
   TaskTypeCustom,
   TaskTypeDeepSeek,
   TaskTypeElasticsearch,
@@ -168,6 +169,17 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint {
   task_type: TaskTypeCohere
 }
 
+export class InferenceEndpointInfoContextualAi extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeContextualAI
+}
+
 export class InferenceEndpointInfoCustom extends InferenceEndpoint {
   /**
    * The inference Id
@@ -389,6 +401,7 @@ export class RateLimitSetting {
    * * `azureopenai` service and task type `text_embedding`: `1440`
    * * `azureopenai` service and task type `completion`: `120`
    * * `cohere` service: `10000`
+   * * `contextualai` service: `1000`
    * * `elastic` service and task type `chat_completion`: `240`
    * * `googleaistudio` service: `360`
    * * `googlevertexai` service: `30000`
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
index 5e76973a74..e0e5882eb3 100644
--- a/specification/inference/_types/TaskType.ts
+++ b/specification/inference/_types/TaskType.ts
@@ -79,6 +79,10 @@ export enum TaskTypeCohere {
   completion
 }
 
+export enum TaskTypeContextualAI {
+  rerank
+}
+
 export enum TaskTypeCustom {
   text_embedding,
   sparse_embedding,
diff --git a/specification/inference/put_contextualai/PutContextualAiRequest.ts b/specification/inference/put_contextualai/PutContextualAiRequest.ts
new file mode 100644
index 0000000000..fefd3fb051
--- /dev/null
+++ b/specification/inference/put_contextualai/PutContextualAiRequest.ts
@@ -0,0 +1,87 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import {
+  ContextualAIServiceSettings,
+  ContextualAIServiceType,
+  ContextualAITaskSettings
+} from '@inference/_types/CommonTypes'
+import { InferenceChunkingSettings } from '@inference/_types/Services'
+import { TaskTypeContextualAI } from '@inference/_types/TaskType'
+
+/**
+ * Create an Contextual AI inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `contexualai` service.
+ *
+ * To review the available `rerank` models, refer to <https://docs.contextual.ai/api-reference/rerank/rerank#body-model>.
+ * @rest_spec_name inference.put_contextualai
+ * @availability stack since=9.2.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-contextualai
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{contextualai_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: TaskTypeContextualAI
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    contextualai_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The chunking configuration object.
+     * @ext_doc_id inference-chunking
+     */
+    chunking_settings?: InferenceChunkingSettings
+    /**
+     * The type of service supported for the specified task type. In this case, `contextualai`.
+     */
+    service: ContextualAIServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `contextualai` service.
+     */
+    service_settings: ContextualAIServiceSettings
+    /**
+     * Settings to configure the inference task.
+     * These settings are specific to the task type you specified.
+     */
+    task_settings?: ContextualAITaskSettings
+  }
+}
diff --git a/specification/inference/put_contextualai/PutContextualAiResponse.ts b/specification/inference/put_contextualai/PutContextualAiResponse.ts
new file mode 100644
index 0000000000..b816091333
--- /dev/null
+++ b/specification/inference/put_contextualai/PutContextualAiResponse.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfoContextualAi } from '@inference/_types/Services'
+
+export class Response {
+  /** @codegen_name endpoint_info */
+  body: InferenceEndpointInfoContextualAi
+}
diff --git a/specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml b/specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml
new file mode 100644
index 0000000000..28c8825ea2
--- /dev/null
+++ b/specification/inference/put_contextualai/examples/request/PutContextualAiRequestExample1.yaml
@@ -0,0 +1,16 @@
+summary: A rerank task
+description: Run `PUT _inference/rerank/contextualai-rerank` to create an inference endpoint for rerank tasks using the Contextual AI service.
+method_request: 'PUT _inference/rerank/contextualai-rerank'
+# type: "request"
+value: |-
+  {
+      "service": "contextualai",
+      "service_settings": {
+          "api_key": "ContextualAI-Api-key",
+          "model_id": "ctxl-rerank-v2-instruct-multilingual-mini"
+      },
+      "task_settings": {
+          "instruction": "Rerank the following documents based on their relevance to the query.",
+          "top_k": 3
+      }
+  }