ServiceNow · vipul-mittal · Oct 15, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 9, 2025
@@ -153,7 +153,7 @@ async def initialize_model_statuses():
         )
         new_model_model = st.text_input("Model")
         new_model_url = st.text_input("URL")
-        new_model_api_key = st.text_input("API Key / Auth Token", type="password")
+        new_model_auth_token = st.text_input("Auth Token", type="password")
         new_model_api_version = st.text_input("API Version")
 
         if st.form_submit_button("➕ Add New Parameter"):
@@ -214,7 +214,7 @@ async def initialize_model_statuses():
                     "model_type": new_model_type,
                     "model": new_model_model,
                     "url": new_model_url,
-                    "api_key": new_model_api_key,
+                    "auth_token": new_model_auth_token,
                     "api_version": new_model_api_version,
                     **new_params,
                     "parameters": {

@@ -6,7 +6,7 @@
         "Install them with: pip install 'sygra[ui]'"
     )
 import httpx
-from openai import AsyncAzureOpenAI
+from openai import AsyncAzureOpenAI, AsyncOpenAI
 from mistralai_azure import MistralAzure
 from mistralai_azure.utils.retries import RetryConfig, BackoffStrategy
 import aiohttp
@@ -17,7 +17,7 @@ async def check_openai_status(session, model_name, model_data):
     try:
         client = AsyncAzureOpenAI(
             azure_endpoint=model_data["url"],
-            api_key=model_data["api_key"],
+            api_key=model_data["auth_token"],
             api_version=model_data["api_version"],
             timeout=model_data.get("timeout", 10),
             default_headers={"Connection": "close"},
@@ -110,32 +110,24 @@ async def check_tgi_status(session, model_name, model_data):
 
 async def check_vllm_status(session, model_name, model_data):
     try:
-        url = model_data["url"]
-        auth_token = model_data.get("auth_token", "").replace("Bearer ", "")
-        model_serving_name = model_data.get("model_serving_name", model_name)
+        client = AsyncOpenAI(
+            base_url=model_data["url"],
+            api_key=model_data["auth_token"],
+            timeout=model_data.get("timeout", 10),
+            default_headers={"Connection": "close"},
+        )
 
-        async with httpx.AsyncClient(
-            http1=True, verify=True, timeout=model_data.get("timeout", 10)
-        ) as client:
-            headers = {
-                "Authorization": f"Bearer {auth_token}",
-                "Content-Type": "application/json",
-                "Connection": "close",
-            }
-            payload = {
-                "model": model_serving_name,
-                "prompt": "Hello!",
-                "max_tokens": 5,
-                "temperature": 0.1,
-            }
-
-            response = await client.post(url, json=payload, headers=headers)
-
-            if response.status_code == 200:
-                st.session_state["active_models"].append(model_name)
-                return model_name, True
-            else:
-                return model_name, False
+        # Sending test request
+        completion = await client.chat.completions.create(
+            model=model_data.get("model_serving_name", model_name),
+            messages=[{"role": "system", "content": "Hello!"}],
+            max_tokens=5,
+            temperature=0.1,
+        )
+
+        # If no exception, model is active
+        st.session_state["active_models"].append(model_name)
+        return model_name, True
 
     except Exception as e:
         return model_name, False

@@ -58,7 +58,7 @@ SYGRA_MIXTRAL_8X7B_CHAT_TEMPLATE={% for m in messages %} ... {% endfor %}
 | `ssl_verify`                 | *(Optional)* Verify SSL certificate (default: true)                                                        |
 | `ssl_cert`                   | *(Optional)* Path to SSL certificate file                                                                  |
 > **Note:**  
-> - Do **not** include `url`, `auth_token`, or `api_key` in your YAML config. These are sourced from environment variables as described above.<br>
+> - Do **not** include `url`, `auth_token` in your YAML config. These are sourced from environment variables as described above.<br>
 > - If you want to set **ssl_verify** to **false** globally, you can set `ssl_verify:false` under `model_config` section in config/configuration.yaml
 #### Customizable Model Parameters
 

@@ -26,7 +26,7 @@ mistralai:
 gpt4:
   model_type: azure_openai
   model: gpt-4-32k
-  # URL and api_key should be defined at .env file as SYGRA_GPT4_URL and SYGRA_GPT4_TOKEN
+  # URL and auth_token should be defined at .env file as SYGRA_GPT4_URL and SYGRA_GPT4_TOKEN
   api_version: 2024-05-01-preview
   parameters:
     max_tokens: 500
@@ -35,7 +35,7 @@ gpt4:
 gpt-4o:
   model_type: azure_openai
   model: gpt-4o
-  # URL and api_key should be defined at .env file as SYGRA_GPT-4O_URL and SYGRA_GPT-4O_TOKEN
+  # URL and auth_token should be defined at .env file as SYGRA_GPT-4O_URL and SYGRA_GPT-4O_TOKEN
   api_version: 2024-02-15-preview
   parameters:
     max_tokens: 500
@@ -44,15 +44,15 @@ gpt-4o:
 gpt-4o-mini:
   model_type: azure_openai
   model: gpt-4o-mini
-  # URL and api_key should be defined at .env file as SYGRA_GPT-4O-MINI_URL and SYGRA_GPT-4O-MINI_TOKEN
+  # URL and auth_token should be defined at .env file as SYGRA_GPT-4O-MINI_URL and SYGRA_GPT-4O-MINI_TOKEN
   api_version: 2024-08-01-preview
   parameters:
     max_tokens: 5000
     temperature: 0.0001
 
 #QWEN VL 72b deployed in vllm
 qwen_vl_72b:
-  # URL and api_key should be defined at .env file as SYGRA_QWEN_VL_72B_URL and SYGRA_QWEN_VL_72B_TOKEN
+  # URL and auth_token should be defined at .env file as SYGRA_QWEN_VL_72B_URL and SYGRA_QWEN_VL_72B_TOKEN
   hf_chat_template_model_id: Qwen/Qwen2.5-VL-72B-Instruct
   model_type: vllm
   parameters:
@@ -61,7 +61,7 @@ qwen_vl_72b:
 
 #QWEN 32B deployed in vllm
 qwen3_32b:
-  # URL and api_key should be defined at .env file as SYGRA_QWEN3_32B_URL and SYGRA_QWEN3_32B_TOKEN
+  # URL and auth_token should be defined at .env file as SYGRA_QWEN3_32B_URL and SYGRA_QWEN3_32B_TOKEN
   model_serving_name: qwen3_32b
   hf_chat_template_model_id: Qwen/Qwen3-32B
   model_type: vllm

@@ -189,7 +189,7 @@ def _create_openai_azure_client(
         """
         model_config = utils.get_updated_model_config(model_config)
         utils.validate_required_keys(
-            ["url", "api_key", "api_version", "model"], model_config, "model"
+            ["url", "auth_token", "api_version", "model"], model_config, "model"
         )
         ssl_verify: bool = bool(model_config.get("ssl_verify", True))
         ssl_cert = model_config.get("ssl_cert")

@@ -262,10 +262,7 @@ def name(self) -> str:
 
     def _get_model_params(self) -> ModelParams:
         url = self.model_config.get("url", "")
-        if "auth_token" in self.model_config:
-            auth_token = self.model_config.get("auth_token", "")
-        else:
-            auth_token = self.model_config.get("api_key", "")
+        auth_token = self.model_config.get("auth_token", "")
 
         return_url = None
         return_auth_token = None
@@ -391,7 +388,7 @@ def ping(self) -> int:
         if returns 200, its success
         """
         url_obj = self.model_config.get("url")
-        auth_token = self.model_config.get("auth_token") or self.model_config.get("api_key")
+        auth_token = self.model_config.get("auth_token")
         if isinstance(url_obj, list):
             for i, url in enumerate(url_obj):
                 token = auth_token[i] if isinstance(auth_token, list) else auth_token
@@ -957,7 +954,7 @@ class CustomOpenAI(BaseCustomModel):
     def __init__(self, model_config: dict[str, Any]) -> None:
         super().__init__(model_config)
         utils.validate_required_keys(
-            ["url", "api_key", "api_version", "model"], model_config, "model"
+            ["url", "auth_token", "api_version", "model"], model_config, "model"
         )
         self.model_config = model_config
 

@@ -143,10 +143,7 @@ def _get_model_params(self) -> ModelParams:
             ModelParams: The model parameters.
         """
         url = self._config["url"]
-        if "auth_token" in self._config:
-            auth_token = self._config["auth_token"]
-        else:
-            auth_token = self._config["api_key"]
+        auth_token = self._config["auth_token"]
 
         return_url = None
         return_auth_token = None

@@ -77,12 +77,11 @@ def load_model_config(config_path: Optional[str] = None) -> Any:
 
         # Look for auth token/API key in environment variables
         if token := os.environ.get(f"{env_prefix}_TOKEN"):
-            # Determine whether to use auth_token or api_key based on model_type
-            model_type = config.get("model_type", "").lower()
-
-            # OpenAI models use api_key, others use auth_token
-            if model_type == "azure_openai":
-                config["api_key"] = token
+            # Check if it contains the list separator (indicating a list of Auth Tokens)
+            if constants.LIST_SEPARATOR in token:
+                # Split by the separator and filter out any empty strings
+                token_list = [t for t in token.split(constants.LIST_SEPARATOR) if t]
+                config["auth_token"] = token_list
             else:
                 config["auth_token"] = token
 

@@ -472,15 +472,15 @@ def mock_conditional_subgraph_config():
 def mock_model_config():
     return {
         "gpt-4o": {
-            "api_key": "dummy-keys",
+            "auth_token": "dummy-keys",
             "api_version": "2024-02-15-preview",
             "model": "gpt-4o",
             "model_type": "azure_openai",
             "parameters": {"max_tokens": 500, "temperature": 1.0},
             "url": "https://test-url.com/",
         },
         "gpt4": {
-            "api_key": "dummy-keys",
+            "auth_token": "dummy-keys",
             "api_version": "2024-05-01-preview",
             "model": "gpt-4-32k",
             "model_type": "azure_openai",

@@ -205,7 +205,7 @@ def test_create_openai_azure_client(self, mock_validate, mock_openai_client):
         model_config = {
             "model_type": "azure_openai",
             "url": model_url,
-            "api_key": auth_token,
+            "auth_token": auth_token,
             "api_version": "2023-05-15",
             "model": "gpt-4",
             "timeout": 90,
@@ -216,7 +216,7 @@ def test_create_openai_azure_client(self, mock_validate, mock_openai_client):
 
         # Verify the client was created with the right parameters
         mock_validate.assert_called_once_with(
-            ["url", "api_key", "api_version", "model"], model_config, "model"
+            ["url", "auth_token", "api_version", "model"], model_config, "model"
         )
         self.assertIsNotNone(client)
         mock_openai_client.assert_called_once()
@@ -246,7 +246,7 @@ def test_create_openai_azure_client_multi_url(self, mock_validate, mock_openai_c
         model_config = {
             "model_type": "azure_openai",
             "url": [model_url1, model_url2],
-            "api_key": [auth_token1, auth_token2],
+            "auth_token": [auth_token1, auth_token2],
             "api_version": "2023-05-15",
             "model": "gpt-4",
             "timeout": 90,
@@ -257,7 +257,7 @@ def test_create_openai_azure_client_multi_url(self, mock_validate, mock_openai_c
 
         # Verify the client was created with the right parameters
         mock_validate.assert_called_once_with(
-            ["url", "api_key", "api_version", "model"], model_config, "model"
+            ["url", "auth_token", "api_version", "model"], model_config, "model"
         )
         self.assertIsNotNone(client)
         mock_openai_client.assert_called_once()
@@ -288,7 +288,7 @@ def test_create_openai_azure_client_multi_url_single_auth_token(
         model_config = {
             "model_type": "azure_openai",
             "url": [model_url1, model_url2],
-            "api_key": auth_token,
+            "auth_token": auth_token,
             "api_version": "2023-05-15",
             "model": "gpt-4",
             "timeout": 90,
@@ -299,7 +299,7 @@ def test_create_openai_azure_client_multi_url_single_auth_token(
 
         # Verify the client was created with the right parameters
         mock_validate.assert_called_once_with(
-            ["url", "api_key", "api_version", "model"], model_config, "model"
+            ["url", "auth_token", "api_version", "model"], model_config, "model"
         )
         self.assertIsNotNone(client)
         mock_openai_client.assert_called_once()

@@ -48,7 +48,7 @@ def setUp(self):
             "name": "test_openai",
             "model_type": "azure_openai",
             "url": "http://openai-test.com",
-            "api_key": "test-key",
+            "auth_token": "test-key",
             "api_version": "2023-05-15",
             "model": "gpt-4",
         }

@@ -90,15 +90,15 @@ def mock_sygra_config():
 def mock_model_config():
     return {
         "gpt-4o": {
-            "api_key": "dummy-key",
+            "auth_token": "dummy-key",
             "api_version": "2024-02-15-preview",
             "model": "gpt-4o",
             "model_type": "azure_openai",
             "parameters": {"max_tokens": 500, "temperature": 1.0},
             "url": "https://test-url.com/",
         },
         "gpt4": {
-            "api_key": "dummy-key",
+            "auth_token": "dummy-key",
             "api_version": "2024-05-01-preview",
             "model": "gpt-4-32k",
             "model_type": "azure_openai",

@@ -152,7 +152,34 @@ def test_load_model_config_url_list(self, mock_load_yaml):
         # Verify model2 has a single URL string
         self.assertIsInstance(result["model2"]["url"], str)
         self.assertEqual(result["model2"]["url"], "http://api.openai.com/v1/")
-        self.assertEqual(result["model2"]["api_key"], "test-token-2")
+        self.assertEqual(result["model2"]["auth_token"], "test-token-2")
+
+    @patch.dict(os.environ, {}, clear=True)
+    @patch("sygra.utils.utils.load_yaml_file")
+    def test_load_model_config_url_token_list(self, mock_load_yaml):
+        """Test that pipe-separated URLs in environment variables are correctly parsed into lists."""
+        # Mock the base configs loaded from YAML
+        mock_load_yaml.return_value = {
+            "model1": {"model_type": "vllm", "parameters": {"temperature": 0.7}},
+        }
+
+        # Set up environment variables with pipe-separated URLs, Tokens
+        os.environ["SYGRA_MODEL1_URL"] = (
+            f"http://server1.example.com/v1/{constants.LIST_SEPARATOR}http://server2.example.com/v1/"
+        )
+        os.environ["SYGRA_MODEL1_TOKEN"] = f"test-token-1{constants.LIST_SEPARATOR}test-token-2"
+
+        # Call the function
+        result = utils.load_model_config()
+
+        # Verify model1 has a list of URLs
+        self.assertIsInstance(result["model1"]["url"], list)
+        self.assertEqual(len(result["model1"]["url"]), 2)
+        self.assertEqual(result["model1"]["url"][0], "http://server1.example.com/v1/")
+        self.assertEqual(result["model1"]["url"][1], "http://server2.example.com/v1/")
+        self.assertIsInstance(result["model1"]["auth_token"], list)
+        self.assertEqual(result["model1"]["auth_token"][0], "test-token-1")
+        self.assertEqual(result["model1"]["auth_token"][1], "test-token-2")
 
 
 if __name__ == "__main__":