Update proxy_plugin.py

codelion · codelion · commit 920196688895 · 2025-09-09T21:53:42.000+08:00
diff --git a/optillm/plugins/proxy_plugin.py b/optillm/plugins/proxy_plugin.py
@@ -5,7 +5,8 @@
 with health monitoring, failover, and support for wrapping other approaches.
 """
 import logging
-from typing import Tuple, Optional
+import threading
+from typing import Tuple, Optional, Dict
 from optillm.plugins.proxy.config import ProxyConfig
 from optillm.plugins.proxy.client import ProxyClient
 from optillm.plugins.proxy.approach_handler import ApproachHandler
@@ -21,6 +22,78 @@
 # Global proxy client cache to maintain state between requests
 _proxy_client_cache = {}
 
+# Global cache for system message support per provider-model combination
+_system_message_support_cache: Dict[str, bool] = {}
+_cache_lock = threading.RLock()
+
+def _test_system_message_support(proxy_client, model: str) -> bool:
+    """
+    Test if a model supports system messages by making a minimal test request.
+    Returns True if supported, False otherwise.
+    """
+    try:
+        # Try a minimal system message request
+        test_response = proxy_client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "test"},
+                {"role": "user", "content": "hi"}
+            ],
+            max_tokens=1,  # Minimal token generation
+            temperature=0
+        )
+        return True
+    except Exception as e:
+        error_msg = str(e).lower()
+        # Check for known system message rejection patterns
+        if any(pattern in error_msg for pattern in [
+            "developer instruction", 
+            "system message", 
+            "not enabled",
+            "not supported"
+        ]):
+            logger.info(f"Model {model} does not support system messages: {str(e)[:100]}")
+            return False
+        else:
+            # If it's a different error, assume system messages are supported
+            # but something else went wrong (rate limit, timeout, etc.)
+            logger.debug(f"System message test failed for {model}, assuming supported: {str(e)[:100]}")
+            return True
+
+def _get_system_message_support(proxy_client, model: str) -> bool:
+    """
+    Get cached system message support status, testing if not cached.
+    Thread-safe with locking.
+    """
+    # Create a unique cache key based on model and base_url
+    cache_key = f"{getattr(proxy_client, '_base_identifier', 'default')}:{model}"
+    
+    with _cache_lock:
+        if cache_key not in _system_message_support_cache:
+            logger.debug(f"Testing system message support for {model}")
+            _system_message_support_cache[cache_key] = _test_system_message_support(proxy_client, model)
+        
+        return _system_message_support_cache[cache_key]
+
+def _format_messages_for_model(system_prompt: str, initial_query: str, 
+                              supports_system_messages: bool) -> list:
+    """
+    Format messages based on whether the model supports system messages.
+    """
+    if supports_system_messages:
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": initial_query}
+        ]
+    else:
+        # Merge system prompt into user message
+        if system_prompt.strip():
+            combined_message = f"{system_prompt}\n\nUser: {initial_query}"
+        else:
+            combined_message = initial_query
+        
+        return [{"role": "user", "content": combined_message}]
+
 def run(system_prompt: str, initial_query: str, client, model: str, 
         request_config: dict = None) -> Tuple[str, int]:
     """
@@ -119,14 +192,21 @@ def run(system_prompt: str, initial_query: str, client, model: str,
                 logger.info(f"Proxy routing approach/plugin: {potential_approach}")
                 return result
         
-        # Direct proxy execution
+        # Direct proxy execution with dynamic system message support detection
         logger.info(f"Direct proxy routing for model: {model}")
+        
+        # Test and cache system message support for this model
+        supports_system_messages = _get_system_message_support(proxy_client, model)
+        
+        # Format messages based on system message support
+        messages = _format_messages_for_model(system_prompt, initial_query, supports_system_messages)
+        
+        if not supports_system_messages:
+            logger.info(f"Using fallback message formatting for {model} (no system message support)")
+        
         response = proxy_client.chat.completions.create(
             model=model,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": initial_query}
-            ],
+            messages=messages,
             **(request_config or {})
         )