feat: add ability to specify params to drop

max-wittig · max-wittig · commit 828ce8c99531 · 2025-09-03T12:53:47.000+02:00
Some LLMs don't support all params that clients could send. This MR gives the administrator the ability to filter those out Similar to https://docs.litellm.ai/docs/completion/drop_params Signed-off-by: Max Wittig <max.wittig@siemens.com>
diff --git a/src/vllm_router/app.py b/src/vllm_router/app.py
@@ -199,6 +199,8 @@ def initialize_all(app: FastAPI, args):
     if args.callbacks:
         configure_custom_callbacks(args.callbacks, app)
 
+    app.state.drop_params = parse_comma_separated_args(args.drop_params)
+
     initialize_routing_logic(
         args.routing_logic,
         session_key=args.session_key,
diff --git a/src/vllm_router/parsers/parser.py b/src/vllm_router/parsers/parser.py
@@ -240,6 +240,16 @@ def parse_args():
         help="The request rewriter to use. Default is 'noop' (no rewriting).",
     )
 
+    # Drop params arguments
+    parser.add_argument(
+        "--drop-params",
+        type=str,
+        default=None,
+        help="Comma-separated list of OpenAI parameters to drop from requests. "
+        "This allows dropping unsupported parameters by your LLM provider. "
+        "Example: 'frequency_penalty,logit_bias'",
+    )
+
     # Batch API
     # TODO(gaocegege): Make these batch api related arguments to a separate config.
     parser.add_argument(
diff --git a/src/vllm_router/services/request_service/request.py b/src/vllm_router/services/request_service/request.py
@@ -203,6 +203,13 @@ async def route_general_request(
                 status_code=400, detail="Request body is not JSON parsable."
             )
 
+    if hasattr(request.app.state, "drop_params") and request.app.state.drop_params:
+        for param in request.app.state.drop_params:
+            request_json.pop(param, None)
+            logger.debug(f"Dropped param {param} from request")
+        request_body = json.dumps(request_json)
+        update_content_length(request, request_body)
+
     service_discovery = get_service_discovery()
     endpoints = service_discovery.get_endpoint_info()
 
diff --git a/tests/e2e/run-static-discovery-routing-test.sh b/tests/e2e/run-static-discovery-routing-test.sh
@@ -99,7 +99,8 @@ start_router() {
         --decode-model-labels "decode" \
         --static-model-labels "prefill,decode" \
         --session-key "$SESSION_KEY" \
-        --routing-logic "$routing_logic" > "$log_file" 2>&1 &
+        --routing-logic "$routing_logic" \
+        --drop-params "test-param-to-drop" > "$log_file" 2>&1 &
 
     ROUTER_PID=$!
     print_status "Router started with PID: $ROUTER_PID"
diff --git a/tests/e2e/test-routing.py b/tests/e2e/test-routing.py
@@ -157,15 +157,20 @@ def _save_routing_lines(
             )
             return False
 
-    def send_request(self, request_id: str, prompt: str) -> bool:
+    def send_request(
+        self, request_id: str, prompt: str, custom_payload: dict = None
+    ) -> bool:
         """Send a single request and track which endpoint it goes to"""
         try:
-            payload = {
-                "model": self.model,
-                "prompt": prompt,
-                "temperature": 0.7,
-                "max_tokens": 10,
-            }
+            if custom_payload is not None:
+                payload = custom_payload
+            else:
+                payload = {
+                    "model": self.model,
+                    "prompt": prompt,
+                    "temperature": 0.7,
+                    "max_tokens": 10,
+                }
 
             headers = {
                 "Content-Type": "application/json",
@@ -542,6 +547,43 @@ def test_chat_completions(self) -> bool:
             print_error(f"❌ Chat completions failed: {e} payload: {payload}")
             return False
 
+    def test_drop_params(self) -> bool:
+        """Test that the router drops specified parameters from requests"""
+        print_status("🧪 Testing drop_params functionality")
+
+        # Send a request with parameters that should be dropped
+        try:
+            # Use the existing send_request method with custom payload to test drop_params
+            custom_payload = {
+                "model": self.model,
+                "prompt": "Test prompt with parameters to drop",
+                "temperature": 0.7,
+                "max_tokens": 10,
+                "test-param-to-drop": 0.5,  # This should be dropped
+            }
+
+            # Send request using existing method with custom payload
+            if not self.send_request(
+                "test-drop-params-request",
+                "Test prompt with parameters to drop",
+                custom_payload,
+            ):
+                print_error("❌ Drop params test request failed")
+                return False
+
+            # Check router logs for evidence that parameters were dropped
+            content = self._read_log_file()
+            if content is not None:
+                if "Dropped param test-param-to-drop from request" in str(content):
+                    print_status("✅ Drop params test request completed successfully")
+                    return True
+
+            print_error("❌ Drop params test request failed")
+            return False
+        except Exception as e:
+            print_error(f"❌ Unexpected error in drop params test: {e}")
+            return False
+
     def run_test(self) -> bool:
         """Run the complete routing test"""
         try:
@@ -559,6 +601,10 @@ def run_test(self) -> bool:
             if not self.test_chat_completions():
                 return False
 
+            # Test drop_params functionality
+            if not self.test_drop_params():
+                return False
+
             # Test routing logic
             test_runners = {
                 "roundrobin": self.test_roundrobin_routing,
diff --git a/uv.lock b/uv.lock