diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index 3a1b55ecb8..b1af63fa0c 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -4,7 +4,7 @@ rendered properly in your Markdown viewer. # Run Inference on servers -Inference is the process of using a trained model to make predictions on new data. Because this process can be compute-intensive, running on a dedicated or external service can be an interesting option. +Inference is the process of using a trained model to make predictions on new data. Because this process can be compute-intensive, running on a dedicated or external service can be an interesting option. The `huggingface_hub` library provides a unified interface to run inference across multiple services for models hosted on the Hugging Face Hub: 1. [HF Inference API](https://huggingface.co/docs/api-inference/index): a serverless solution that allows you to run model inference on Hugging Face's infrastructure for free. This service is a fast way to get started, test different models, and prototype AI products. @@ -121,8 +121,8 @@ What if you want to use a specific model? You can specify it either as a paramet -When using the Hugging Face Inference API (default provider), each task comes with a recommended model from the 200k+ models available on the Hub. -However, this recommendation can change over time, so it's best to explicitly set a model once you've decided which one to use. +When using the Hugging Face Inference API (default provider), each task comes with a recommended model from the 200k+ models available on the Hub. +However, this recommendation can change over time, so it's best to explicitly set a model once you've decided which one to use. For third-party providers, you must always specify a model that is compatible with that provider. Visit the [Models](https://huggingface.co/models?inference=warm) page on the Hub to explore models available through the Inference API, or check the provider's documentation for their supported models. @@ -176,7 +176,7 @@ using our provider keys, and the usage will be billed directly to your Hugging F ```python >>> client = InferenceClient( provider="replicate", - token="hf_****" # Your HF token + token="hf_****" # Your HF token ) ``` @@ -248,36 +248,36 @@ You might wonder why using [`InferenceClient`] instead of OpenAI's client? There [`InferenceClient`]'s goal is to provide the easiest interface to run inference on Hugging Face models, on any provider. It has a simple API that supports the most common tasks. Here is a table showing which providers support which tasks: -| Domain | Task | Black Forest Labs | HF Inference | fal-ai | Fireworks AI | Hyperbolic | Nebius AI Studio | Novita AI | Replicate | Sambanova | Together | -| ------------------- | --------------------------------------------------- | ---------------- | ------------ | ------ | --------- | ---------- | ---------------- | ------ | --------- | --------- | ----------- | -| **Audio** | [`~InferenceClient.audio_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.audio_to_audio`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.automatic_speech_recognition`] | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.text_to_speech`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| **Computer Vision** | [`~InferenceClient.image_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.image_segmentation`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.image_to_image`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.image_to_text`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.object_detection`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.text_to_image`] | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | -| | [`~InferenceClient.text_to_video`] | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| | [`~InferenceClient.zero_shot_image_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Multimodal** | [`~InferenceClient.document_question_answering`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.visual_question_answering`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **NLP** | [`~InferenceClient.chat_completion`] | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | -| | [`~InferenceClient.feature_extraction`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.fill_mask`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.question_answering`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.sentence_similarity`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.summarization`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.table_question_answering`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.text_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.text_generation`] | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | -| | [`~InferenceClient.token_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.translation`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.zero_shot_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| **Tabular** | [`~InferenceClient.tabular_classification`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| | [`~InferenceClient.tabular_regression`] | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Domain | Task | Black Forest Labs | Cohere | fal-ai | Fireworks AI | HF Inference | Hyperbolic | Nebius AI Studio | Novita AI | Replicate | Sambanova | Together | +| ------------------- | --------------------------------------------------- | ----------------- | ------ | ------ | ------------ | ------------ | ---------- | ---------------- | --------- | --------- | --------- | -------- | +| **Audio** | [`~InferenceClient.audio_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.audio_to_audio`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.automatic_speech_recognition`] | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.text_to_speech`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| **Computer Vision** | [`~InferenceClient.image_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.image_segmentation`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.image_to_image`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.image_to_text`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.object_detection`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.text_to_image`] | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | +| | [`~InferenceClient.text_to_video`] | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| | [`~InferenceClient.zero_shot_image_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Multimodal** | [`~InferenceClient.document_question_answering`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.visual_question_answering`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **NLP** | [`~InferenceClient.chat_completion`] | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | +| | [`~InferenceClient.feature_extraction`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.fill_mask`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.question_answering`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.sentence_similarity`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.summarization`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.table_question_answering`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.text_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.text_generation`] | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | +| | [`~InferenceClient.token_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.translation`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.zero_shot_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **Tabular** | [`~InferenceClient.tabular_classification`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| | [`~InferenceClient.tabular_regression`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index eab4239dc4..3f7b57a96b 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -133,7 +133,7 @@ class InferenceClient: path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api) documentation for details). When passing a URL as `model`, the client will not append any suffix path to it. provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`. + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`. defaults to hf-inference (Hugging Face Serverless Inference API). If model is a URL or `base_url` is passed, then `provider` is not used. token (`str`, *optional*): diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index ddd8740037..8f9718e078 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -121,7 +121,7 @@ class AsyncInferenceClient: path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api) documentation for details). When passing a URL as `model`, the client will not append any suffix path to it. provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`. + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`. defaults to hf-inference (Hugging Face Serverless Inference API). If model is a URL or `base_url` is passed, then `provider` is not used. token (`str`, *optional*): diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py index 2a4a1ca715..cfb1a6985d 100644 --- a/src/huggingface_hub/inference/_providers/__init__.py +++ b/src/huggingface_hub/inference/_providers/__init__.py @@ -2,6 +2,7 @@ from ._common import TaskProviderHelper from .black_forest_labs import BlackForestLabsTextToImageTask +from .cohere import CohereConversationalTask from .fal_ai import ( FalAIAutomaticSpeechRecognitionTask, FalAITextToImageTask, @@ -20,6 +21,7 @@ PROVIDER_T = Literal[ "black-forest-labs", + "cohere", "fal-ai", "fireworks-ai", "hf-inference", @@ -35,6 +37,9 @@ "black-forest-labs": { "text-to-image": BlackForestLabsTextToImageTask(), }, + "cohere": { + "conversational": CohereConversationalTask(), + }, "fal-ai": { "automatic-speech-recognition": FalAIAutomaticSpeechRecognitionTask(), "text-to-image": FalAITextToImageTask(), diff --git a/src/huggingface_hub/inference/_providers/_common.py b/src/huggingface_hub/inference/_providers/_common.py index d2344b0782..58bfe5a830 100644 --- a/src/huggingface_hub/inference/_providers/_common.py +++ b/src/huggingface_hub/inference/_providers/_common.py @@ -17,6 +17,7 @@ # # Example: # "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct", + "cohere": {}, "fal-ai": {}, "fireworks-ai": {}, "hf-inference": {}, diff --git a/src/huggingface_hub/inference/_providers/cohere.py b/src/huggingface_hub/inference/_providers/cohere.py new file mode 100644 index 0000000000..a0143ac841 --- /dev/null +++ b/src/huggingface_hub/inference/_providers/cohere.py @@ -0,0 +1,15 @@ +from huggingface_hub.inference._providers._common import ( + BaseConversationalTask, +) + + +_PROVIDER = "cohere" +_BASE_URL = "https://api.cohere.com" + + +class CohereConversationalTask(BaseConversationalTask): + def __init__(self): + super().__init__(provider=_PROVIDER, base_url=_BASE_URL) + + def _prepare_route(self, mapped_model: str) -> str: + return "/compatibility/v1/chat/completions" diff --git a/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[cohere,conversational].yaml b/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[cohere,conversational].yaml new file mode 100644 index 0000000000..10c23ee695 --- /dev/null +++ b/tests/cassettes/TestInferenceClient.test_chat_completion_no_stream[cohere,conversational].yaml @@ -0,0 +1,102 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is deep learning?"}], "model": "command-r7b-12-2024", + "stream": false}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '181' + Content-Type: + - application/json + X-Amzn-Trace-Id: + - 204391c6-92c8-4214-a394-04b025f3e86a + method: POST + uri: https://api.cohere.com/compatibility/v1/chat/completions + response: + body: + string: '{"id":"3b5751bb-10a2-4fc8-95a0-d1e6cfa788b3","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"Deep + learning is a subfield of machine learning and artificial intelligence that + focuses on training artificial neural networks to learn and make predictions + from data. It is inspired by the structure and function of the human brain, + particularly the interconnected network of neurons.\n\nIn deep learning, artificial + neural networks are composed of multiple layers of interconnected nodes, or + \"neurons,\" which process and transform input data. These networks are designed + to automatically learn and extract hierarchical representations of data through + a process called \"training.\" The training process involves adjusting the + network''s internal parameters (weights and biases) to minimize the difference + between predicted and actual outputs.\n\nHere are some key characteristics + and concepts in deep learning:\n\n1. Neural Networks: Deep learning models + are primarily based on artificial neural networks, which are composed of layers + of nodes. These networks can have various architectures, such as convolutional + neural networks (CNNs) for image processing, recurrent neural networks (RNNs) + for sequential data, and transformer networks for natural language processing.\n\n2. + Deep Architecture: The term \"deep\" in deep learning refers to the depth + of the neural network, meaning it has multiple hidden layers between the input + and output layers. These hidden layers enable the network to learn complex + patterns and representations from the data.\n\n3. Learning and Training: Deep + learning models are trained using large amounts of labeled data and a process + called backpropagation. During training, the network adjusts its internal + parameters to minimize a loss function, which measures the difference between + predicted and actual outputs. This optimization process is typically done + using gradient descent or its variants.\n\n4. Feature Learning: One of the + key advantages of deep learning is its ability to automatically learn relevant + features from raw data. Unlike traditional machine learning, where feature + engineering is required, deep learning models can discover and extract features + at multiple levels of abstraction.\n\n5. Applications: Deep learning has been + applied to a wide range of tasks and domains, including image and speech recognition, + natural language processing, object detection, medical diagnosis, game playing + (e.g., AlphaGo), and autonomous driving.\n\nDeep learning has revolutionized + many areas of artificial intelligence due to its ability to handle complex + and large-scale data, learn hierarchical representations, and achieve state-of-the-art + performance in various tasks. It has driven significant advancements in areas + like computer vision, natural language understanding, and speech recognition."}}],"created":1740653732,"model":"command-r7b-12-2024","object":"chat.completion","usage":{"prompt_tokens":11,"completion_tokens":476,"total_tokens":487}}' + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + Via: + - 1.1 google + access-control-expose-headers: + - X-Debug-Trace-ID + cache-control: + - no-cache, no-store, no-transform, must-revalidate, private, max-age=0 + content-type: + - application/json + date: + - Thu, 27 Feb 2025 10:55:32 GMT + expires: + - Thu, 01 Jan 1970 00:00:00 UTC + num_chars: + - '2831' + num_tokens: + - '487' + pragma: + - no-cache + server: + - envoy + vary: + - Origin + x-accel-expires: + - '0' + x-api-warning: + - Please set an API version, for more information please refer to https://docs.cohere.com/versioning-reference + - Version is deprecated, for more information please refer to https://docs.cohere.com/versioning-reference + x-debug-trace-id: + - 430c1e5519b95b094771bcc36304445e + x-envoy-upstream-service-time: + - '2740' + x-trial-endpoint-call-limit: + - '100' + x-trial-endpoint-call-remaining: + - '99' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[cohere,conversational].yaml b/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[cohere,conversational].yaml new file mode 100644 index 0000000000..e81f868916 --- /dev/null +++ b/tests/cassettes/TestInferenceClient.test_chat_completion_with_stream[cohere,conversational].yaml @@ -0,0 +1,146 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is deep learning?"}], "model": "command-r7b-12-2024", + "max_tokens": 20, "stream": true}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '198' + Content-Type: + - application/json + X-Amzn-Trace-Id: + - 68c492d9-abbd-4d0a-8462-e598765021e4 + method: POST + uri: https://api.cohere.com/compatibility/v1/chat/completions + response: + body: + string: 'data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":"","role":"assistant"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":"Deep"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + learning"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + is"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + a"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + sub"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":"field"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + of"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + machine"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + learning"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + and"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + artificial"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + intelligence"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + that"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + focuses"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + on"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + training"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + artificial"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + neural"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":null,"delta":{"content":" + networks"}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk"} + + + data: {"id":"2bb1b33e-53d9-4fae-8958-2e54c1e60f09","choices":[{"index":0,"finish_reason":"length","delta":{}}],"created":1740653733,"model":"command-r7b-12-2024","object":"chat.completion.chunk","usage":{"prompt_tokens":11,"completion_tokens":19,"total_tokens":30}} + + + data: [DONE] + + + ' + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Transfer-Encoding: + - chunked + Via: + - 1.1 google + access-control-expose-headers: + - X-Debug-Trace-ID + cache-control: + - no-cache, no-store, no-transform, must-revalidate, private, max-age=0 + content-type: + - text/event-stream + date: + - Thu, 27 Feb 2025 10:55:33 GMT + expires: + - Thu, 01 Jan 1970 00:00:00 UTC + pragma: + - no-cache + server: + - envoy + vary: + - Origin + x-accel-expires: + - '0' + x-api-warning: + - Please set an API version, for more information please refer to https://docs.cohere.com/versioning-reference + - Version is deprecated, for more information please refer to https://docs.cohere.com/versioning-reference + x-debug-trace-id: + - 4bc0ce4bda5305b5b60ef6268db5e3a7 + x-envoy-upstream-service-time: + - '88' + x-trial-endpoint-call-limit: + - '100' + x-trial-endpoint-call-remaining: + - '98' + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py index 07648ef618..f7a7e5d60b 100644 --- a/tests/test_inference_client.py +++ b/tests/test_inference_client.py @@ -63,6 +63,9 @@ "black-forest-labs": { "text-to-image": "black-forest-labs/FLUX.1-dev", }, + "cohere": { + "conversational": "CohereForAI/c4ai-command-r7b-12-2024", + }, "together": { "conversational": "meta-llama/Meta-Llama-3-8B-Instruct", "text-generation": "meta-llama/Llama-2-70b-hf", diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py index 871abd5877..87bdc34094 100644 --- a/tests/test_inference_providers.py +++ b/tests/test_inference_providers.py @@ -9,6 +9,7 @@ recursive_merge, ) from huggingface_hub.inference._providers.black_forest_labs import BlackForestLabsTextToImageTask +from huggingface_hub.inference._providers.cohere import CohereConversationalTask from huggingface_hub.inference._providers.fal_ai import ( FalAIAutomaticSpeechRecognitionTask, FalAITextToImageTask, @@ -110,6 +111,24 @@ def test_get_response_success(self, mocker): ) +class TestCohereConversationalTask: + def test_prepare_url(self): + helper = CohereConversationalTask() + assert helper.task == "conversational" + url = helper._prepare_url("cohere_token", "username/repo_name") + assert url == "https://api.cohere.com/compatibility/v1/chat/completions" + + def test_prepare_payload_as_dict(self): + helper = CohereConversationalTask() + payload = helper._prepare_payload_as_dict( + [{"role": "user", "content": "Hello!"}], {}, "CohereForAI/command-r7b-12-2024" + ) + assert payload == { + "messages": [{"role": "user", "content": "Hello!"}], + "model": "CohereForAI/command-r7b-12-2024", + } + + class TestFalAIProvider: def test_prepare_headers_fal_ai_key(self): """When using direct call, must use Key authorization."""