Merge pull request #14532 from timelfrink/feat/issue-14476-compactifai-provider

krrishdholakia · web-flow · commit f4e28704906d · 2025-09-15T21:15:00.000-07:00
Add CompactifAI provider support
diff --git a/README.md b/README.md
@@ -316,6 +316,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
 | [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini)          | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
 | [mistral ai api](https://docs.litellm.ai/docs/providers/mistral)                    | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
 | [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers)  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [CompactifAI](https://docs.litellm.ai/docs/providers/compactifai)                   | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
 | [cohere](https://docs.litellm.ai/docs/providers/cohere)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
 | [anthropic](https://docs.litellm.ai/docs/providers/anthropic)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
 | [empower](https://docs.litellm.ai/docs/providers/empower)                    | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
diff --git a/docs/my-website/docs/providers/compactifai.md b/docs/my-website/docs/providers/compactifai.md
@@ -0,0 +1,223 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# CompactifAI
+https://docs.compactif.ai/
+
+CompactifAI offers highly compressed versions of leading language models, delivering up to **70% lower inference costs**, **4x throughput gains**, and **low-latency inference** with minimal quality loss (<5%). CompactifAI's OpenAI-compatible API makes integration straightforward, enabling developers to build ultra-efficient, scalable AI applications with superior concurrency and resource efficiency.
+
+| Property | Details |
+|-------|-------|
+| Description | CompactifAI offers compressed versions of leading language models with up to 70% cost reduction and 4x throughput gains |
+| Provider Route on LiteLLM | `compactifai/` (add this prefix to the model name - e.g. `compactifai/cai-llama-3-1-8b-slim`) |
+| Provider Doc | [CompactifAI ↗](https://docs.compactif.ai/) |
+| API Endpoint for Provider | https://api.compactif.ai/v1 |
+| Supported Endpoints | `/chat/completions`, `/completions` |
+
+## Supported OpenAI Parameters
+
+CompactifAI is fully OpenAI-compatible and supports the following parameters:
+
+```
+"stream",
+"stop",
+"temperature",
+"top_p",
+"max_tokens",
+"presence_penalty",
+"frequency_penalty",
+"logit_bias",
+"user",
+"response_format",
+"seed",
+"tools",
+"tool_choice",
+"parallel_tool_calls",
+"extra_headers"
+```
+
+## API Key Setup
+
+CompactifAI API keys are available through AWS Marketplace subscription:
+
+1. Subscribe via [AWS Marketplace](https://aws.amazon.com/marketplace)
+2. Complete subscription verification (24-hour review process)
+3. Access MultiverseIAM dashboard with provided credentials
+4. Retrieve your API key from the dashboard
+
+```python
+import os
+
+os.environ["COMPACTIFAI_API_KEY"] = "your-api-key"
+```
+
+## Usage
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import os
+
+os.environ['COMPACTIFAI_API_KEY'] = "your-api-key"
+
+response = completion(
+    model="compactifai/cai-llama-3-1-8b-slim",
+    messages=[
+       {"role": "user", "content": "Hello from LiteLLM!"}
+   ],
+)
+print(response)
+```
+
+</TabItem>
+<TabItem value="proxy" label="Proxy">
+
+```yaml
+model_list:
+  - model_name: llama-2-compressed
+    litellm_params:
+      model: compactifai/cai-llama-3-1-8b-slim
+      api_key: os.environ/COMPACTIFAI_API_KEY
+```
+
+</TabItem>
+</Tabs>
+
+## Streaming
+
+```python
+from litellm import completion
+import os
+
+os.environ['COMPACTIFAI_API_KEY'] = "your-api-key"
+
+response = completion(
+    model="compactifai/cai-llama-3-1-8b-slim",
+    messages=[
+       {"role": "user", "content": "Write a short story"}
+   ],
+    stream=True
+)
+
+for chunk in response:
+    print(chunk)
+```
+
+## Advanced Usage
+
+### Custom Parameters
+
+```python
+from litellm import completion
+
+response = completion(
+    model="compactifai/cai-llama-3-1-8b-slim",
+    messages=[{"role": "user", "content": "Explain quantum computing"}],
+    temperature=0.7,
+    max_tokens=500,
+    top_p=0.9,
+    stop=["Human:", "AI:"]
+)
+```
+
+### Function Calling
+
+CompactifAI supports OpenAI-compatible function calling:
+
+```python
+from litellm import completion
+
+functions = [
+    {
+        "name": "get_weather",
+        "description": "Get current weather information",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {
+                    "type": "string",
+                    "description": "The city and state"
+                }
+            },
+            "required": ["location"]
+        }
+    }
+]
+
+response = completion(
+    model="compactifai/cai-llama-3-1-8b-slim",
+    messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
+    tools=[{"type": "function", "function": f} for f in functions],
+    tool_choice="auto"
+)
+```
+
+### Async Usage
+
+```python
+import asyncio
+from litellm import acompletion
+
+async def async_call():
+    response = await acompletion(
+        model="compactifai/cai-llama-3-1-8b-slim",
+        messages=[{"role": "user", "content": "Hello async world!"}]
+    )
+    return response
+
+# Run async function
+response = asyncio.run(async_call())
+print(response)
+```
+
+## Available Models
+
+CompactifAI offers compressed versions of popular models. Use the `/models` endpoint to get the latest list:
+
+```python
+import httpx
+
+headers = {"Authorization": f"Bearer {your_api_key}"}
+response = httpx.get("https://api.compactif.ai/v1/models", headers=headers)
+models = response.json()
+```
+
+Common model formats:
+- `compactifai/cai-llama-3-1-8b-slim`
+- `compactifai/mistral-7b-compressed`
+- `compactifai/codellama-7b-compressed`
+
+## Benefits
+
+- **Cost Efficient**: Up to 70% lower inference costs compared to standard models
+- **High Performance**: 4x throughput gains with minimal quality loss (<5%)
+- **Low Latency**: Optimized for fast response times
+- **Drop-in Replacement**: Full OpenAI API compatibility
+- **Scalable**: Superior concurrency and resource efficiency
+
+## Error Handling
+
+CompactifAI returns standard OpenAI-compatible error responses:
+
+```python
+from litellm import completion
+from litellm.exceptions import AuthenticationError, RateLimitError
+
+try:
+    response = completion(
+        model="compactifai/cai-llama-3-1-8b-slim",
+        messages=[{"role": "user", "content": "Hello"}]
+    )
+except AuthenticationError:
+    print("Invalid API key")
+except RateLimitError:
+    print("Rate limit exceeded")
+```
+
+## Support
+
+- Documentation: https://docs.compactif.ai/
+- LinkedIn: [MultiverseComputing](https://www.linkedin.com/company/multiversecomputing)
+- Analysis: [Artificial Analysis Provider Comparison](https://artificialanalysis.ai/providers/compactifai)
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -453,6 +453,7 @@ const sidebars = {
         "providers/elevenlabs",
         "providers/fireworks_ai",
         "providers/clarifai",
+        "providers/compactifai",
         "providers/vllm",
         "providers/llamafile",
         "providers/infinity",
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1023,6 +1023,7 @@ def add_known_models():
 from .llms.aiohttp_openai.chat.transformation import AiohttpOpenAIChatConfig
 from .llms.galadriel.chat.transformation import GaladrielChatConfig
 from .llms.github.chat.transformation import GithubChatConfig
+from .llms.compactifai.chat.transformation import CompactifAIChatConfig
 from .llms.empower.chat.transformation import EmpowerChatConfig
 from .llms.huggingface.chat.transformation import HuggingFaceChatConfig
 from .llms.huggingface.embedding.transformation import HuggingFaceEmbeddingConfig
diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -372,6 +372,8 @@ def get_llm_provider(  # noqa: PLR0915
             custom_llm_provider = "cometapi"
         elif model.startswith("oci/"):
             custom_llm_provider = "oci"
+        elif model.startswith("compactifai/"):
+            custom_llm_provider = "compactifai"
         elif model.startswith("ovhcloud/"):
             custom_llm_provider = "ovhcloud"
         if not custom_llm_provider:
diff --git a/litellm/llms/compactifai/__init__.py b/litellm/llms/compactifai/__init__.py
@@ -0,0 +1 @@
+# CompactifAI provider for LiteLLM
diff --git a/litellm/llms/compactifai/chat/__init__.py b/litellm/llms/compactifai/chat/__init__.py
@@ -0,0 +1 @@
+# CompactifAI chat completions
diff --git a/litellm/llms/compactifai/chat/transformation.py b/litellm/llms/compactifai/chat/transformation.py
@@ -0,0 +1,100 @@
+"""
+CompactifAI chat completion transformation
+"""
+
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
+
+import httpx
+
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.utils import ModelResponse
+from litellm.llms.openai.common_utils import OpenAIError
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+
+from ...openai.chat.gpt_transformation import OpenAIGPTConfig
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
+
+class CompactifAIChatConfig(OpenAIGPTConfig):
+    """
+    Configuration class for CompactifAI chat completions.
+    Since CompactifAI is OpenAI-compatible, we extend OpenAIGPTConfig.
+    """
+
+    def _get_openai_compatible_provider_info(
+        self,
+        api_base: Optional[str],
+        api_key: Optional[str],
+    ) -> Tuple[Optional[str], Optional[str]]:
+        """
+        Get API base and key for CompactifAI provider.
+        """
+        api_base = api_base or "https://api.compactif.ai/v1"
+        dynamic_api_key = api_key or get_secret_str("COMPACTIFAI_API_KEY") or ""
+        return api_base, dynamic_api_key
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List,
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        """
+        Transform CompactifAI response to LiteLLM format.
+        Since CompactifAI is OpenAI-compatible, we can use the standard OpenAI transformation.
+        """
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=raw_response.text,
+            additional_args={"complete_input_dict": request_data},
+        )
+
+        ## RESPONSE OBJECT
+        response_json = raw_response.json()
+
+        # Handle JSON mode if needed
+        if json_mode:
+            for choice in response_json["choices"]:
+                message = choice.get("message")
+                if message and message.get("tool_calls"):
+                    # Convert tool calls to content for JSON mode
+                    tool_calls = message.get("tool_calls", [])
+                    if len(tool_calls) == 1:
+                        message["content"] = tool_calls[0]["function"].get("arguments", "")
+                        message["tool_calls"] = None
+
+        returned_response = ModelResponse(**response_json)
+
+        # Set model name with provider prefix
+        returned_response.model = f"compactifai/{model}"
+
+        return returned_response
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        """
+        Get the appropriate error class for CompactifAI errors.
+        Since CompactifAI is OpenAI-compatible, we use OpenAI error handling.
+        """
+        return OpenAIError(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
diff --git a/litellm/main.py b/litellm/main.py
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
diff --git a/litellm/utils.py b/litellm/utils.py
diff --git a/tests/test_litellm/llms/compactifai/test_compactifai.py b/tests/test_litellm/llms/compactifai/test_compactifai.py