updated model configuration in routing. (#20)

kernelism · web-flow · commit 8f5df2ab591c · 2025-05-22T11:26:30.000+05:30
* updated model configuration in routing. added the option to include model lists in .env and updated .env.example as well.

* fallback json fix

* moved model config to toml as suggested

* added an env variable to override model config toml path

* added pydantic models for LLM model config and routing params

* updated setup info with model config in README

* minor visual fix to README

* fix tests looking for config file

* update yml to work with env changes
diff --git a/.env.example b/.env.example
@@ -1,3 +1,6 @@
+# override this to your own model config toml
+LITELLM_CONFIG_PATH=model.config.toml
+
 # Azure OpenAI Configuration (Legacy)
 AZURE_OPENAI_MODEL=your_model_name_here # e.g., o3-mini-deep-research
 AZURE_OPENAI_ENDPOINT=https://your-resource-name.cognitiveservices.azure.com/
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -22,6 +22,7 @@ jobs:
     env:
       X_API_KEY: "some-api-key"
       IS_PROD: "true"
+      LITELLM_DEFAULT_MODEL_GROUP: ${{ secrets.LITELLM_DEFAULT_MODEL_GROUP }}
 
     steps:
       - name: Checkout repository
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,8 @@ attachments/**
 
 results/**
 
+model.config.toml
+
 # Python
 __pycache__/
 *.py[cod]
diff --git a/README.md b/README.md
@@ -105,16 +105,12 @@ poetry run dramatiq mxtoai.tasks --watch ./.
 Copy the `.env.example` file to `.env` and update with your specific configuration:
 
 ```env
+LITELLM_CONFIG_PATH=model.config.toml
+
 # Redis configuration
 REDIS_HOST=localhost
 REDIS_PORT=6379
 
-# Model configuration
-MODEL_ENDPOINT=your_azure_openai_endpoint
-MODEL_API_KEY=your_azure_openai_api_key
-MODEL_NAME=your-azure-openai-model-deployment
-MODEL_API_VERSION=2025-01-01-preview
-
 # Optional for research functionality
 JINA_API_KEY=your-jina-api-key
 
@@ -126,6 +122,35 @@ AZURE_VISION_KEY=your-azure-vision-key
 SERPAPI_API_KEY=your-serpapi-api-key
 ```
 
+This project supports load balancing and routing across multiple models, so you can define as many models as you'd like. Copy `model.config.example.toml` to a toml file and update it with your preferred configuration. Update `.env` with the path your toml relative to root.
+
+A sample configuration looks like this:
+
+```toml
+[[model]]
+model_name = "gpt-4"
+
+[model.litellm_params]
+model = "azure/gpt-4"
+base_url = "https://your-endpoint.openai.azure.com"
+api_key = "your-key"
+api_version = "2023-05-15"
+weight = 5
+```
+
+It is also recommended that you set router configuration. It will be defaulted to the below config if not set.
+
+```toml
+[router_config]
+routing_strategy = "simple-shuffle"
+
+[[router_config.fallbacks]]
+gpt-4 = ["gpt-4-reasoning"]
+
+[router_config.default_litellm_params]
+drop_params = true
+```
+
 ## API Endpoints
 
 ### Process Email
diff --git a/model.config.example.toml b/model.config.example.toml
@@ -0,0 +1,38 @@
+[[model]]
+model_name = "gpt-4"
+
+[model.litellm_params]
+model = "azure/gpt-4"
+base_url = "https://your-endpoint.openai.azure.com"
+api_key = "your-key"
+api_version = "2023-05-15"
+weight = 5
+
+[[model]]
+model_name = "gpt-4"
+
+[model.litellm_params]
+model = "azure/gpt-4-1106-preview"
+base_url = "https://your-endpoint-2.openai.azure.com"
+api_key = "your-key-2"
+api_version = "2023-05-15"
+weight = 5
+
+[[model]]
+model_name = "gpt-4-reasoning"
+
+[model.litellm_params]
+model = "azure/gpt-4o-mini"
+base_url = "https://your-endpoint-3.openai.azure.com"
+api_key = "your-key-3"
+api_version = "2023-05-15"
+weight = 1
+
+[router_config]
+routing_strategy = "simple-shuffle"
+
+[[router_config.fallbacks]]
+gpt-4 = ["gpt-4-reasoning"]
+
+[router_config.default_litellm_params]
+drop_params = true
diff --git a/mxtoai/exceptions.py b/mxtoai/exceptions.py
@@ -1,7 +1,19 @@
 class UnspportedHandleException(Exception):
-    def __init__(self, message):
+    def __init__(self, message: str):
         super().__init__(message)
 
 class HandleAlreadyExistsException(Exception):
-    def __init__(self, message):
+    def __init__(self, message: str):
+        super().__init__(message)
+
+class EnvironmentVariableNotFoundException(Exception):
+    def __init__(self, message: str):
+        super().__init__(message)
+
+class ModelListNotFoundException(Exception):
+    def __init__(self, message: str):
+        super().__init__(message)
+    
+class ModelConfigFileNotFoundException(Exception):
+    def __init__(self, message: str):
         super().__init__(message)
diff --git a/mxtoai/models.py b/mxtoai/models.py
@@ -1,11 +1,11 @@
-from typing import Optional
+from typing import Optional, Any, Dict, List
 
 from pydantic import BaseModel
 
 
 class ProcessingInstructions(BaseModel):
     handle: str
-    aliases: list[str]
+    aliases: List[str]
     process_attachments: bool
     deep_research_mandatory: bool
     rejection_message: Optional[str] = (
@@ -18,3 +18,19 @@ class ProcessingInstructions(BaseModel):
     requires_schedule_extraction: bool = False
     target_model: Optional[str] = "gpt-4"
     output_instructions: Optional[str] = None
+
+class LiteLLMParams(BaseModel):
+    model: str
+    base_url: str
+    api_key: str
+    api_version: str
+    weight: int
+
+class ModelConfig(BaseModel):
+    model_name: str
+    litellm_params: LiteLLMParams
+
+class RouterConfig(BaseModel):
+    routing_strategy: str
+    fallbacks: List[Dict[str, List[str]]]
+    default_litellm_params: Dict[str, Any]
diff --git a/mxtoai/routed_litellm_model.py b/mxtoai/routed_litellm_model.py
@@ -1,9 +1,12 @@
 import os
-from typing import Any, Optional
+from typing import Any, Optional, List, Dict
 
+import toml
 from dotenv import load_dotenv
 from smolagents import ChatMessage, LiteLLMRouterModel, Tool
 
+import mxtoai.models as models
+import mxtoai.exceptions as exceptions
 from mxtoai._logging import get_logger
 from mxtoai.models import ProcessingInstructions
 
@@ -25,64 +28,98 @@ def __init__(self, current_handle: Optional[ProcessingInstructions] = None, **kw
 
         """
         self.current_handle = current_handle
+        self.config_path = os.getenv("LITELLM_CONFIG_PATH", "model.config.example.toml")
+        self.config = self._load_toml_config()
 
         # Configure model list from environment variables
-        model_list = [
-            {
-                "model_name": "gpt-4",
-                "litellm_params": {
-                    "model": f"azure/{os.getenv('GPT4O_1_NAME')}",
-                    "base_url": os.getenv("GPT4O_1_ENDPOINT"),
-                    "api_key": os.getenv("GPT4O_1_API_KEY"),
-                    "api_version": os.getenv("GPT4O_1_API_VERSION"),
-                    "weight": int(os.getenv("GPT4O_1_WEIGHT", 5)),
-                },
-            },
-            {
-                "model_name": "gpt-4",
-                "litellm_params": {
-                    "model": f"azure/{os.getenv('GPT41_MINI_NAME')}",
-                    "base_url": os.getenv("GPT41_MINI_ENDPOINT"),
-                    "api_key": os.getenv("GPT41_MINI_API_KEY"),
-                    "api_version": os.getenv("GPT41_MINI_API_VERSION"),
-                    "weight": int(os.getenv("GPT41_MINI_WEIGHT", 5)),
-                },
-            },
-            {
-                "model_name": "gpt-4-reasoning",
-                "litellm_params": {
-                    "model": f"azure/{os.getenv('O3_MINI_NAME')}",
-                    "api_base": os.getenv("O3_MINI_ENDPOINT"),
-                    "api_key": os.getenv("O3_MINI_API_KEY"),
-                    "api_version": os.getenv("O3_MINI_API_VERSION"),
-                    "weight": int(os.getenv("O3_MINI_WEIGHT", 1)),
-                },
-            },
-        ]
-
-        client_router_kwargs = {
-            "routing_strategy": "simple-shuffle",
-            "fallbacks": [
-                {
-                    "gpt-4": ["gpt-4-reasoning"]  # Fallback to reasoning model if both GPT-4 instances fail
-                }
-            ],
-            # "set_verbose": True,
-            # "debug_level": "DEBUG",
-            "default_litellm_params": {"drop_params": True},  # Global setting for dropping unsupported parameters
-        }
-
+        model_list = self._load_model_config()
+        client_router_kwargs = self._load_router_config()
+        
         # The model_id for LiteLLMRouterModel is the default model group the router will target.
         # Our _get_target_model() will override this per call via the 'model' param in generate().
-        default_model_group = "gpt-4"
+        default_model_group = os.getenv("LITELLM_DEFAULT_MODEL_GROUP")
+
+        if not default_model_group:
+            raise exceptions.EnvironmentVariableNotFoundException(
+                "LITELLM_DEFAULT_MODEL_GROUP environment variable not found. Please set it to the default model group."
+            )
 
         super().__init__(
             model_id=default_model_group,
-            model_list=model_list,
-            client_kwargs=client_router_kwargs,
+            model_list=[model.dict() for model in model_list],
+            client_kwargs=client_router_kwargs.dict(),
             **kwargs,  # Pass through other LiteLLMModel/Model kwargs
         )
 
+    def _load_toml_config(self) -> Dict[str, Any]:
+        """
+        Load configuration from a TOML file.
+        
+        Returns:    
+            Dict[str, Any]: Configuration loaded from the TOML file.
+        """
+
+        if not os.path.exists(self.config_path):
+            raise exceptions.ModelConfigFileNotFoundException(
+                f"Model config file not found at {self.config_path}. Please check the path."
+            )
+
+        try:
+            with open(self.config_path, "r") as f:
+                return toml.load(f)
+        except Exception as e:
+            logger.error(f"Failed to load TOML config: {e}")
+            return {}
+
+    def _load_model_config(self) -> List[Dict[str, Any]]:
+        """
+        Load model configuration from environment variables.
+
+        Returns:
+            List[Dict[str, Any]]: List of model configurations.
+
+        """
+        model_entries = self.config.get("model", [])
+        model_list = []
+
+        if isinstance(model_entries, dict):
+            # In case there's only one model (TOML parser returns dict)
+            model_entries = [model_entries]
+
+        for entry in model_entries:
+            model_list.append(models.ModelConfig(
+                model_name=entry.get("model_name"),
+                litellm_params=models.LiteLLMParams(
+                    **entry.get("litellm_params")
+                )
+            ))
+
+        if not model_list:
+            raise exceptions.ModelListNotFoundException(
+                "No model list found in config toml. Please check the configuration."
+            )
+
+        return model_list
+
+    def _load_router_config(self) -> models.RouterConfig:
+        """
+        Load router configuration from environment variables.
+        
+        Returns:
+           models.RouterConfig: Router configuration
+        """
+        router_config = models.RouterConfig(**self.config.get("router_config"))
+        
+        if not router_config:
+            logger.warning("No router config found in model-config.toml. Using defaults.")
+            return models.RouterConfig(
+                routing_strategy="simple-shuffle",
+                fallbacks=[],
+                default_litellm_params={"drop_params": True},
+            )
+        return router_config
+
+
     def _get_target_model(self) -> str:
         """
         Determine which model to route to based on the current handle configuration.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "jinja2 (>=3.1.6,<4.0.0)",
     "pydantic[email] (>=2.11.4,<3.0.0)",
     "python-multipart (>=0.0.20,<0.0.21)",
+    "toml (>=0.10.2,<0.11.0)",
 ]
 
 [tool.ruff]

Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ dependencies = [`
`44`	`44`	`"jinja2 (>=3.1.6,<4.0.0)",`
`45`	`45`	`"pydantic[email] (>=2.11.4,<3.0.0)",`
`46`	`46`	`"python-multipart (>=0.0.20,<0.0.21)",`
	`47`	`+ "toml (>=0.10.2,<0.11.0)",`
`47`	`48`	`]`
`48`	`49`
`49`	`50`	`[tool.ruff]`