Merge pull request #14778 from otaviofbrito/chore/fix-vllm-pasthrough

krrishdholakia · web-flow · commit b7803bcbb008 · 2025-09-22T22:40:15.000-07:00
fix vllm passthrough
diff --git a/litellm/llms/vllm/common_utils.py b/litellm/llms/vllm/common_utils.py
@@ -11,7 +11,21 @@
 
 
 class VLLMError(BaseLLMException):
-    pass
+    def __init__(
+        self,
+        status_code: int,
+        message: str,
+        request: Optional[httpx.Request] = None,
+        response: Optional[httpx.Response] = None,
+        headers: Optional[Union[httpx.Headers, dict]] = None,
+    ):
+        super().__init__(
+            status_code=status_code,
+            message=message,
+            request=request,
+            response=response,
+            headers=headers,
+        )
 
 
 class VLLMModelInfo(BaseLLMModelInfo):
@@ -25,7 +39,8 @@ def validate_environment(
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
     ) -> dict:
-        """Google AI Studio sends api key in query params"""
+        if api_key is not None:
+            headers["x-api-key"] = api_key
         return headers
 
     @staticmethod
@@ -53,7 +68,7 @@ def get_models(
         endpoint = "/v1/models"
         if api_base is None or api_key is None:
             raise ValueError(
-                "GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
+                "VLLM_API_BASE or VLLM_API_KEY is not set. Please set the environment variable, to query VLLM's `/models` endpoint."
             )
 
         url = _add_path_to_api_base(api_base, endpoint)
diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm/proxy/common_utils/http_parsing_utils.py
@@ -233,14 +233,16 @@ async def get_request_body(request: Request) -> Dict[str, Any]:
     """
     Read the request body and parse it as JSON.
     """
-    if request.headers.get("content-type") == "application/json":
-        return await _read_request_body(request)
-    elif (
-        request.headers.get("content-type") == "multipart/form-data"
-        or request.headers.get("content-type") == "application/x-www-form-urlencoded"
-    ):
-        return await get_form_data(request)
-    else:
-        raise ValueError(
-            f"Unsupported content type: {request.headers.get('content-type')}"
-        )
+    if request.method == "POST":
+        if request.headers.get("content-type", "") == "application/json":
+            return await _read_request_body(request)
+        elif (
+            "multipart/form-data" in request.headers.get("content-type", "")
+            or "application/x-www-form-urlencoded" in request.headers.get("content-type", "")
+        ):
+            return await get_form_data(request)
+        else:
+            raise ValueError(
+                f"Unsupported content type: {request.headers.get('content-type')}"
+            )
+    return {}
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -108,7 +108,16 @@ async def llm_passthrough_factory_proxy_route(
 
     # Construct the full target URL using httpx
     base_url = httpx.URL(base_target_url)
-    updated_url = base_url.copy_with(path=encoded_endpoint)
+    # Join paths correctly by removing trailing/leading slashes as needed
+    if not base_url.path or base_url.path == "/":
+        # If base URL has no path, just use the new path
+        updated_url = base_url.copy_with(path=encoded_endpoint)
+    else:
+        # Otherwise, combine the paths
+        base_path = base_url.path.rstrip("/")
+        clean_path = encoded_endpoint.lstrip("/")
+        full_path = f"{base_path}/{clean_path}"
+        updated_url = base_url.copy_with(path=full_path)
 
     # Add or update query parameters
     provider_api_key = passthrough_endpoint_router.get_credentials(
@@ -130,7 +139,11 @@ async def llm_passthrough_factory_proxy_route(
     is_streaming_request = False
     # anthropic is streaming when 'stream' = True is in the body
     if request.method == "POST":
-        _request_body = await request.json()
+        if "multipart/form-data" not in request.headers.get("content-type", ""):
+            _request_body = await request.json()
+        else:
+            _request_body = await get_form_data(request)
+        
         if _request_body.get("stream"):
             is_streaming_request = True
 
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py b/tests/test_litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
@@ -19,6 +19,8 @@
     BaseOpenAIPassThroughHandler,
     RouteChecks,
     create_pass_through_route,
+    llm_passthrough_factory_proxy_route,
+    vllm_proxy_route,
     vertex_discovery_proxy_route,
     vertex_proxy_route,
     bedrock_llm_proxy_route,
@@ -914,3 +916,119 @@ async def test_bedrock_llm_proxy_route_regular_model(self):
             # For regular models, model should be just the model ID
             assert call_kwargs["model"] == "anthropic.claude-3-sonnet-20240229-v1:0"
             assert result == "success"
+
+
+class TestLLMPassthroughFactoryProxyRoute:
+    @pytest.mark.asyncio
+    async def test_llm_passthrough_factory_proxy_route_success(self):
+        from litellm.types.utils import LlmProviders
+        mock_request = MagicMock(spec=Request)
+        mock_request.method = "POST"
+        mock_request.json = AsyncMock(return_value={"stream": False})
+        mock_fastapi_response = MagicMock(spec=Response)
+        mock_user_api_key_dict = MagicMock()
+
+        with patch(
+            "litellm.utils.ProviderConfigManager.get_provider_model_info"
+        ) as mock_get_provider, patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router.get_credentials"
+        ) as mock_get_creds, patch(
+            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+        ) as mock_create_route:
+            mock_provider_config = MagicMock()
+            mock_provider_config.get_api_base.return_value = "https://example.com/v1"
+            mock_provider_config.validate_environment.return_value = {
+                "x-api-key": "dummy"
+            }
+            mock_get_provider.return_value = mock_provider_config
+            mock_get_creds.return_value = "dummy"
+
+            mock_endpoint_func = AsyncMock(return_value="success")
+            mock_create_route.return_value = mock_endpoint_func
+
+            result = await llm_passthrough_factory_proxy_route(
+                custom_llm_provider=LlmProviders.VLLM,
+                endpoint="/chat/completions",
+                request=mock_request,
+                fastapi_response=mock_fastapi_response,
+                user_api_key_dict=mock_user_api_key_dict,
+            )
+
+            assert result == "success"
+            mock_get_provider.assert_called_once_with(
+                provider=litellm.LlmProviders(LlmProviders.VLLM), model=None
+            )
+            mock_get_creds.assert_called_once_with(
+                custom_llm_provider=LlmProviders.VLLM, region_name=None
+            )
+            mock_create_route.assert_called_once_with(
+                endpoint="/chat/completions",
+                target="https://example.com/v1/chat/completions",
+                custom_headers={"x-api-key": "dummy"},
+            )
+            mock_endpoint_func.assert_awaited_once()
+
+
+class TestVLLMProxyRoute:
+    @pytest.mark.asyncio
+    @patch(
+        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.get_request_body",
+        return_value={"model": "router-model", "stream": False},
+    )
+    @patch(
+        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.is_passthrough_request_using_router_model",
+        return_value=True,
+    )
+    @patch("litellm.proxy.proxy_server.llm_router")
+    async def test_vllm_proxy_route_with_router_model(
+        self, mock_llm_router, mock_is_router, mock_get_body
+    ):
+        mock_request = MagicMock(spec=Request)
+        mock_request.method = "POST"
+        mock_request.headers = {"content-type": "application/json"}
+        mock_request.query_params = {}
+        mock_fastapi_response = MagicMock(spec=Response)
+        mock_user_api_key_dict = MagicMock()
+        mock_llm_router.allm_passthrough_route = AsyncMock(
+            return_value=httpx.Response(200, json={"response": "success"})
+        )
+
+        await vllm_proxy_route(
+            endpoint="/chat/completions",
+            request=mock_request,
+            fastapi_response=mock_fastapi_response,
+            user_api_key_dict=mock_user_api_key_dict,
+        )
+
+        mock_is_router.assert_called_once()
+        mock_llm_router.allm_passthrough_route.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    @patch(
+        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.get_request_body",
+        return_value={"model": "other-model"},
+    )
+    @patch(
+        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.is_passthrough_request_using_router_model",
+        return_value=False,
+    )
+    @patch(
+        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.llm_passthrough_factory_proxy_route"
+    )
+    async def test_vllm_proxy_route_fallback_to_factory(
+        self, mock_factory_route, mock_is_router, mock_get_body
+    ):
+        mock_request = MagicMock(spec=Request)
+        mock_fastapi_response = MagicMock(spec=Response)
+        mock_user_api_key_dict = MagicMock()
+        mock_factory_route.return_value = "factory_success"
+
+        result = await vllm_proxy_route(
+            endpoint="/chat/completions",
+            request=mock_request,
+            fastapi_response=mock_fastapi_response,
+            user_api_key_dict=mock_user_api_key_dict,
+        )
+
+        assert result == "factory_success"
+        mock_factory_route.assert_awaited_once()