diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 63df904d1b..44629ed72b 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17610,7 +17610,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put", "parameters": [ { @@ -17704,7 +17704,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-1", "parameters": [ { @@ -18687,7 +18687,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n\nFor Elastic's `rerank` task:\nThe selected model must support the `sentence-ranking` task and expose OpenAI API.\nHuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far.\nAfter the endpoint is initialized, copy the full endpoint URL for use.\nTested models for `rerank` task:\n\n* `bge-reranker-base`\n* `jina-reranker-v1-turbo-en-GGUF`\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -18727,6 +18727,9 @@ }, "service_settings": { "$ref": "#/components/schemas/inference._types.HuggingFaceServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types.HuggingFaceTaskSettings" } }, "required": [ @@ -18739,6 +18742,11 @@ "summary": "A text embedding task", "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + }, + "PutHuggingFaceRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 3\n }\n}" } } } @@ -19237,6 +19245,16 @@ "summary": "Rerank task", "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.", "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\"\n}" + }, + "RerankRequestExample2": { + "summary": "Rerank task", + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": false,\n \"top_n\": 2\n}" + }, + "RerankRequestExample3": { + "summary": "Rerank task", + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": true,\n \"top_n\": 3\n}" } } } @@ -19255,6 +19273,16 @@ "summary": "Rerank task", "description": "A successful response from `POST _inference/rerank/cohere_rerank`.\n", "value": "{\n \"rerank\": [\n {\n \"index\": \"2\",\n \"relevance_score\": \"0.011597361\",\n \"text\": \"leia\"\n },\n {\n \"index\": \"0\",\n \"relevance_score\": \"0.006338922\",\n \"text\": \"luke\"\n },\n {\n \"index\": \"5\",\n \"relevance_score\": \"0.0016166499\",\n \"text\": \"star\"\n },\n {\n \"index\": \"4\",\n \"relevance_score\": \"0.0011695103\",\n \"text\": \"r2d2\"\n },\n {\n \"index\": \"1\",\n \"relevance_score\": \"5.614787E-4\",\n \"text\": \"like\"\n },\n {\n \"index\": \"6\",\n \"relevance_score\": \"3.7850367E-4\",\n \"text\": \"wars\"\n },\n {\n \"index\": \"3\",\n \"relevance_score\": \"1.2508839E-5\",\n \"text\": \"chewy\"\n }\n ]\n}" + }, + "RerankResponseExample2": { + "summary": "Rerank task", + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794\n }\n ]\n}" + }, + "RerankResponseExample3": { + "summary": "Rerank task", + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844,\n \"text\": \"wars\"\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794,\n \"text\": \"star\"\n },\n {\n \"index\": 3,\n \"relevance_score\": 0.004520818,\n \"text\": \"chewy\"\n }\n ]\n}" } } } @@ -79913,6 +79941,7 @@ "enum": [ "chat_completion", "completion", + "rerank", "text_embedding" ] }, @@ -79952,6 +79981,19 @@ "url" ] }, + "inference._types.HuggingFaceTaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return doc text within the results.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.", + "type": "number" + } + } + }, "inference._types.JinaAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 29aedcbb43..dbcb2526f4 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9667,7 +9667,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put", "parameters": [ { @@ -9761,7 +9761,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-1", "parameters": [ { @@ -10744,7 +10744,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n ##Required authorization\n* Cluster privileges: `manage_inference`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n\nFor Elastic's `rerank` task:\nThe selected model must support the `sentence-ranking` task and expose OpenAI API.\nHuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far.\nAfter the endpoint is initialized, copy the full endpoint URL for use.\nTested models for `rerank` task:\n\n* `bge-reranker-base`\n* `jina-reranker-v1-turbo-en-GGUF`\n ##Required authorization\n* Cluster privileges: `manage_inference`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -10784,6 +10784,9 @@ }, "service_settings": { "$ref": "#/components/schemas/inference._types.HuggingFaceServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types.HuggingFaceTaskSettings" } }, "required": [ @@ -10796,6 +10799,11 @@ "summary": "A text embedding task", "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + }, + "PutHuggingFaceRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 3\n }\n}" } } } @@ -11294,6 +11302,16 @@ "summary": "Rerank task", "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.", "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\"\n}" + }, + "RerankRequestExample2": { + "summary": "Rerank task", + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": false,\n \"top_n\": 2\n}" + }, + "RerankRequestExample3": { + "summary": "Rerank task", + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": true,\n \"top_n\": 3\n}" } } } @@ -11312,6 +11330,16 @@ "summary": "Rerank task", "description": "A successful response from `POST _inference/rerank/cohere_rerank`.\n", "value": "{\n \"rerank\": [\n {\n \"index\": \"2\",\n \"relevance_score\": \"0.011597361\",\n \"text\": \"leia\"\n },\n {\n \"index\": \"0\",\n \"relevance_score\": \"0.006338922\",\n \"text\": \"luke\"\n },\n {\n \"index\": \"5\",\n \"relevance_score\": \"0.0016166499\",\n \"text\": \"star\"\n },\n {\n \"index\": \"4\",\n \"relevance_score\": \"0.0011695103\",\n \"text\": \"r2d2\"\n },\n {\n \"index\": \"1\",\n \"relevance_score\": \"5.614787E-4\",\n \"text\": \"like\"\n },\n {\n \"index\": \"6\",\n \"relevance_score\": \"3.7850367E-4\",\n \"text\": \"wars\"\n },\n {\n \"index\": \"3\",\n \"relevance_score\": \"1.2508839E-5\",\n \"text\": \"chewy\"\n }\n ]\n}" + }, + "RerankResponseExample2": { + "summary": "Rerank task", + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794\n }\n ]\n}" + }, + "RerankResponseExample3": { + "summary": "Rerank task", + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844,\n \"text\": \"wars\"\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794,\n \"text\": \"star\"\n },\n {\n \"index\": 3,\n \"relevance_score\": 0.004520818,\n \"text\": \"chewy\"\n }\n ]\n}" } } } @@ -51143,6 +51171,7 @@ "enum": [ "chat_completion", "completion", + "rerank", "text_embedding" ] }, @@ -51182,6 +51211,19 @@ "url" ] }, + "inference._types.HuggingFaceTaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return doc text within the results.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.", + "type": "number" + } + } + }, "inference._types.JinaAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema.json b/output/schema/schema.json index 479e42051b..3bec95c7a4 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9415,7 +9415,7 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", "docId": "inference-api-put", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html", "name": "inference.put", @@ -9920,7 +9920,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n\nFor Elastic's `rerank` task:\nThe selected model must support the `sentence-ranking` task and expose OpenAI API.\nHuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far.\nAfter the endpoint is initialized, copy the full endpoint URL for use.\nTested models for `rerank` task:\n\n* `bge-reranker-base`\n* `jina-reranker-v1-turbo-en-GGUF`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-hugging-face.html", "name": "inference.put_hugging_face", @@ -154201,7 +154201,41 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L988-L990" + "specLocation": "inference/_types/CommonTypes.ts#L1001-L1003" + }, + { + "kind": "interface", + "name": { + "name": "HuggingFaceTaskSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "For a `rerank` task, return doc text within the results.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L982-L992" }, { "kind": "enum", @@ -154212,6 +154246,9 @@ { "name": "completion" }, + { + "name": "rerank" + }, { "name": "text_embedding" } @@ -154220,7 +154257,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L982-L986" + "specLocation": "inference/_types/CommonTypes.ts#L994-L999" }, { "kind": "interface", @@ -154623,7 +154660,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L992-L1021" + "specLocation": "inference/_types/CommonTypes.ts#L1005-L1034" }, { "kind": "enum", @@ -154636,7 +154673,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1051-L1053" + "specLocation": "inference/_types/CommonTypes.ts#L1064-L1066" }, { "kind": "enum", @@ -154655,7 +154692,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1055-L1059" + "specLocation": "inference/_types/CommonTypes.ts#L1068-L1072" }, { "kind": "interface", @@ -154701,7 +154738,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1023-L1044" + "specLocation": "inference/_types/CommonTypes.ts#L1036-L1057" }, { "kind": "enum", @@ -154717,7 +154754,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1046-L1049" + "specLocation": "inference/_types/CommonTypes.ts#L1059-L1062" }, { "kind": "enum", @@ -154739,7 +154776,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1061-L1066" + "specLocation": "inference/_types/CommonTypes.ts#L1074-L1079" }, { "kind": "interface", @@ -154897,7 +154934,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1068-L1095" + "specLocation": "inference/_types/CommonTypes.ts#L1081-L1108" }, { "kind": "enum", @@ -154910,7 +154947,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1103-L1105" + "specLocation": "inference/_types/CommonTypes.ts#L1116-L1118" }, { "kind": "enum", @@ -154929,7 +154966,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1097-L1101" + "specLocation": "inference/_types/CommonTypes.ts#L1110-L1114" }, { "kind": "interface", @@ -155016,7 +155053,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1107-L1149" + "specLocation": "inference/_types/CommonTypes.ts#L1120-L1162" }, { "kind": "enum", @@ -155029,7 +155066,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1165-L1167" + "specLocation": "inference/_types/CommonTypes.ts#L1178-L1180" }, { "kind": "interface", @@ -155051,7 +155088,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1151-L1157" + "specLocation": "inference/_types/CommonTypes.ts#L1164-L1170" }, { "kind": "enum", @@ -155070,7 +155107,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1159-L1163" + "specLocation": "inference/_types/CommonTypes.ts#L1172-L1176" }, { "kind": "interface", @@ -155680,7 +155717,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1169-L1200" + "specLocation": "inference/_types/CommonTypes.ts#L1182-L1213" }, { "kind": "enum", @@ -155693,7 +155730,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1233-L1235" + "specLocation": "inference/_types/CommonTypes.ts#L1246-L1248" }, { "kind": "interface", @@ -155753,7 +155790,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1202-L1226" + "specLocation": "inference/_types/CommonTypes.ts#L1215-L1239" }, { "kind": "enum", @@ -155769,7 +155806,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1228-L1231" + "specLocation": "inference/_types/CommonTypes.ts#L1241-L1244" }, { "kind": "interface", @@ -155857,7 +155894,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1237-L1274" + "specLocation": "inference/_types/CommonTypes.ts#L1250-L1287" }, { "kind": "enum", @@ -155870,7 +155907,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1280-L1282" + "specLocation": "inference/_types/CommonTypes.ts#L1293-L1295" }, { "kind": "enum", @@ -155883,7 +155920,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1276-L1278" + "specLocation": "inference/_types/CommonTypes.ts#L1289-L1291" }, { "kind": "request", @@ -156424,7 +156461,7 @@ } } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`text_embedding`, `rerank`)\n* Watsonx inference integration (`text_embedding`)\n* JinaAI (`text_embedding`, `rerank`)", "inherits": { "type": { "name": "RequestBase", @@ -157838,15 +157875,32 @@ "namespace": "inference._types" } } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskSettings", + "namespace": "inference._types" + } + } } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`\n\nFor Elastic's `rerank` task:\nThe selected model must support the `sentence-ranking` task and expose OpenAI API.\nHuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far.\nAfter the endpoint is initialized, copy the full endpoint URL for use.\nTested models for `rerank` task:\n\n* `bge-reranker-base`\n* `jina-reranker-v1-turbo-en-GGUF`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", "summary": "A text embedding task", "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + }, + "PutHuggingFaceRequestExample2": { + "description": "Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.", + "summary": "A rerank task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 3\n }\n}" } }, "inherits": { @@ -157886,7 +157940,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L30-L113" }, { "kind": "response", @@ -158573,6 +158627,16 @@ "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.", "summary": "Rerank task", "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\"\n}" + }, + "RerankRequestExample2": { + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "summary": "Rerank task", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": false,\n \"top_n\": 2\n}" + }, + "RerankRequestExample3": { + "description": "Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face", + "summary": "Rerank task", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\",\n \"return_documents\": true,\n \"top_n\": 3\n}" } }, "inherits": { @@ -158633,6 +158697,16 @@ "description": "A successful response from `POST _inference/rerank/cohere_rerank`.\n", "summary": "Rerank task", "value": "{\n \"rerank\": [\n {\n \"index\": \"2\",\n \"relevance_score\": \"0.011597361\",\n \"text\": \"leia\"\n },\n {\n \"index\": \"0\",\n \"relevance_score\": \"0.006338922\",\n \"text\": \"luke\"\n },\n {\n \"index\": \"5\",\n \"relevance_score\": \"0.0016166499\",\n \"text\": \"star\"\n },\n {\n \"index\": \"4\",\n \"relevance_score\": \"0.0011695103\",\n \"text\": \"r2d2\"\n },\n {\n \"index\": \"1\",\n \"relevance_score\": \"5.614787E-4\",\n \"text\": \"like\"\n },\n {\n \"index\": \"6\",\n \"relevance_score\": \"3.7850367E-4\",\n \"text\": \"wars\"\n },\n {\n \"index\": \"3\",\n \"relevance_score\": \"1.2508839E-5\",\n \"text\": \"chewy\"\n }\n ]\n}" + }, + "RerankResponseExample2": { + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "summary": "Rerank task", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794\n }\n ]\n}" + }, + "RerankResponseExample3": { + "description": "A successful response from `POST _inference/rerank/bge-reranker-base-mkn`.\n", + "summary": "Rerank task", + "value": "{\n \"rerank\": [\n {\n \"index\": 6,\n \"relevance_score\": 0.50955844,\n \"text\": \"wars\"\n },\n {\n \"index\": 5,\n \"relevance_score\": 0.084341794,\n \"text\": \"star\"\n },\n {\n \"index\": 3,\n \"relevance_score\": 0.004520818,\n \"text\": \"chewy\"\n }\n ]\n}" } }, "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index dddb8a1afb..a9aa88d4c0 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13496,7 +13496,12 @@ export interface InferenceHuggingFaceServiceSettings { export type InferenceHuggingFaceServiceType = 'hugging_face' -export type InferenceHuggingFaceTaskType = 'chat_completion' | 'completion' | 'text_embedding' +export interface InferenceHuggingFaceTaskSettings { + return_documents?: boolean + top_n?: integer +} + +export type InferenceHuggingFaceTaskType = 'chat_completion' | 'completion' | 'rerank' | 'text_embedding' export interface InferenceInferenceChunkingSettings { max_chunk_size?: integer @@ -13888,6 +13893,7 @@ export interface InferencePutHuggingFaceRequest extends RequestBase { chunking_settings?: InferenceInferenceChunkingSettings service: InferenceHuggingFaceServiceType service_settings: InferenceHuggingFaceServiceSettings + task_settings?: InferenceHuggingFaceTaskSettings } } diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 8a2f64030e..04335c0bab 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -979,9 +979,22 @@ export class HuggingFaceServiceSettings { model_id?: string } +export class HuggingFaceTaskSettings { + /** + * For a `rerank` task, return doc text within the results. + */ + return_documents?: boolean + /** + * For a `rerank` task, the number of most relevant documents to return. + * It defaults to the number of the documents. + */ + top_n?: integer +} + export enum HuggingFaceTaskType { chat_completion, completion, + rerank, text_embedding } diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts index ac8dd49bc4..f62d6c4e14 100644 --- a/specification/inference/put/PutRequest.ts +++ b/specification/inference/put/PutRequest.ts @@ -40,7 +40,7 @@ import { TaskType } from '@inference/_types/TaskType' * * ELSER (`sparse_embedding`) * * Google AI Studio (`completion`, `text_embedding`) * * Google Vertex AI (`rerank`, `text_embedding`) - * * Hugging Face (`chat_completion`, `completion`, `text_embedding`) + * * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`) * * Mistral (`chat_completion`, `completion`, `text_embedding`) * * OpenAI (`chat_completion`, `completion`, `text_embedding`) * * VoyageAI (`text_embedding`, `rerank`) diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 6b4ebea972..9181c42149 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -22,6 +22,7 @@ import { Id } from '@_types/common' import { HuggingFaceServiceSettings, HuggingFaceServiceType, + HuggingFaceTaskSettings, HuggingFaceTaskType } from '@inference/_types/CommonTypes' import { InferenceChunkingSettings } from '@inference/_types/Services' @@ -56,6 +57,16 @@ import { InferenceChunkingSettings } from '@inference/_types/Services' * * `Mistral-7B-Instruct-v0.2` * * `QwQ-32B` * * `Phi-3-mini-128k-instruct` + * + * For Elastic's `rerank` task: + * The selected model must support the `sentence-ranking` task and expose OpenAI API. + * HuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far. + * After the endpoint is initialized, copy the full endpoint URL for use. + * Tested models for `rerank` task: + * + * * `bge-reranker-base` + * * `jina-reranker-v1-turbo-en-GGUF` + * * @rest_spec_name inference.put_hugging_face * @availability stack since=8.12.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public @@ -93,5 +104,10 @@ export interface Request extends RequestBase { * Settings used to install the inference model. These settings are specific to the `hugging_face` service. */ service_settings: HuggingFaceServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: HuggingFaceTaskSettings } } diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml new file mode 100644 index 0000000000..b525dbeabe --- /dev/null +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml @@ -0,0 +1,16 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type. +# method_request: "PUT _inference/rerank/hugging-face-rerank" +# type: "request" +value: |- + { + "service": "hugging_face", + "service_settings": { + "api_key": "hugging-face-access-token", + "url": "url-endpoint" + }, + "task_settings": { + "return_documents": true, + "top_n": 3 + } + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample2.yaml b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml new file mode 100644 index 0000000000..4489ae9045 --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": false, + "top_n": 2 + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample3.yaml b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml new file mode 100644 index 0000000000..f7cca4324d --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": true, + "top_n": 3 + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample2.yaml b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml new file mode 100644 index 0000000000..98af71f40a --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml @@ -0,0 +1,18 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844 + }, + { + "index": 5, + "relevance_score": 0.084341794 + } + ] + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample3.yaml b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml new file mode 100644 index 0000000000..a8790df956 --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml @@ -0,0 +1,25 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844, + "text": "wars" + }, + { + "index": 5, + "relevance_score": 0.084341794, + "text": "star" + }, + { + "index": 3, + "relevance_score": 0.004520818, + "text": "chewy" + } + ] + }