Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions src/tests/test_debug_headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright 2024-2025 The vLLM Production Stack Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for the _build_debug_headers helper function."""

import unittest
from dataclasses import dataclass, field
from typing import Dict, List, Optional
from unittest.mock import MagicMock

from vllm_router.services.request_service.request import _build_debug_headers


@dataclass
class MockEndpointInfo:
url: str
model_names: List[str]
Id: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The attribute Id in MockEndpointInfo is capitalized. According to PEP 8, variable and attribute names should be in snake_case (e.g., id). While this is a mock object, it's good practice to maintain consistency with Python's naming conventions, especially if EndpointInfo (the real class) also uses Id.

Suggested change
Id: str
id: str

added_timestamp: float = 0.0
model_label: str = ""
sleep: bool = False
pod_name: Optional[str] = None
service_name: Optional[str] = None
namespace: Optional[str] = None
model_info: Dict = field(default_factory=dict)


class TestBuildDebugHeaders(unittest.TestCase):
"""Test the _build_debug_headers function."""

def setUp(self):
self.endpoints = [
MockEndpointInfo(
url="http://backend-1:8000",
model_names=["llama"],
Id="ep-1",
pod_name="vllm-pod-abc",
),
MockEndpointInfo(
url="http://backend-2:8000",
model_names=["llama"],
Id="ep-2",
pod_name="vllm-pod-def",
),
MockEndpointInfo(
url="http://backend-3:8000",
model_names=["llama"],
Id="ep-3",
pod_name=None,
),
]

def test_basic_debug_headers(self):
"""Test that basic debug headers are returned correctly."""
headers = _build_debug_headers(
"http://backend-1:8000", self.endpoints
)
self.assertEqual(headers["X-Backend-Server"], "http://backend-1:8000")
self.assertEqual(headers["X-Backend-Id"], "ep-1")
self.assertEqual(headers["X-Backend-Pod"], "vllm-pod-abc")

def test_debug_headers_with_router(self):
"""Test that routing logic name is included when router is provided."""
router = MagicMock()
router.__class__.__name__ = "RoundRobinRouter"
Comment on lines +75 to +76
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test changes router.__class__.__name__ on a MagicMock, which mutates the MagicMock class name globally and can create confusing cross-test side effects. Prefer using a small dummy router class/instance whose type name is already the desired value (or type('RoundRobinRouter', (), {})()) so the test remains isolated.

Suggested change
router = MagicMock()
router.__class__.__name__ = "RoundRobinRouter"
router = type("RoundRobinRouter", (), {})()

Copilot uses AI. Check for mistakes.
headers = _build_debug_headers(
"http://backend-1:8000", self.endpoints, router=router
)
self.assertEqual(headers["X-Backend-Server"], "http://backend-1:8000")
self.assertEqual(headers["X-Routing-Logic"], "RoundRobinRouter")

def test_debug_headers_no_pod_name(self):
"""Test that X-Backend-Pod is omitted when pod_name is None."""
headers = _build_debug_headers(
"http://backend-3:8000", self.endpoints
)
self.assertEqual(headers["X-Backend-Server"], "http://backend-3:8000")
self.assertEqual(headers["X-Backend-Id"], "ep-3")
self.assertNotIn("X-Backend-Pod", headers)

def test_debug_headers_unknown_server(self):
"""Test behavior when server_url doesn't match any endpoint."""
headers = _build_debug_headers(
"http://unknown:8000", self.endpoints
)
self.assertEqual(headers["X-Backend-Server"], "http://unknown:8000")
self.assertNotIn("X-Backend-Id", headers)
self.assertNotIn("X-Backend-Pod", headers)

def test_debug_headers_without_router(self):
"""Test that X-Routing-Logic is omitted when router is None."""
headers = _build_debug_headers(
"http://backend-1:8000", self.endpoints, router=None
)
self.assertNotIn("X-Routing-Logic", headers)

def test_debug_headers_second_endpoint(self):
"""Test that correct endpoint metadata is returned for second backend."""
headers = _build_debug_headers(
"http://backend-2:8000", self.endpoints
)
self.assertEqual(headers["X-Backend-Server"], "http://backend-2:8000")
self.assertEqual(headers["X-Backend-Id"], "ep-2")
self.assertEqual(headers["X-Backend-Pod"], "vllm-pod-def")


if __name__ == "__main__":
unittest.main()
71 changes: 69 additions & 2 deletions src/vllm_router/services/request_service/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,43 @@
}


def _build_debug_headers(
server_url: str,
endpoints: list,
router=None,
) -> dict:
"""Build debug response headers with backend information.

These headers help operators identify which backend processed a request,
the routing logic used, and other useful debugging information.

Args:
server_url: The URL of the backend that handled the request.
endpoints: The list of EndpointInfo objects considered for routing.
router: The router instance (optional), used to extract routing logic name.

Returns:
A dict of debug headers to merge into the response headers.
"""
headers = {
"X-Backend-Server": server_url,
}

# Find the endpoint that was used and add its metadata
for ep in endpoints:
if ep.url == server_url:
headers["X-Backend-Id"] = ep.Id
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The attribute ep.Id is capitalized. In Python, attribute names typically follow snake_case (e.g., ep.id) as per PEP 8. Please ensure consistency with the EndpointInfo class definition.

Suggested change
headers["X-Backend-Id"] = ep.Id
headers["X-Backend-Id"] = ep.id

if ep.pod_name:
headers["X-Backend-Pod"] = ep.pod_name
break

# Add routing logic type
if router is not None:
headers["X-Routing-Logic"] = type(router).__name__

Comment on lines +118 to +133
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These debug headers expose internal backend URLs and (optionally) pod names to every client. That can leak infrastructure details and potentially credentials if a backend URL ever contains userinfo. Consider gating this behind an explicit opt-in (e.g., a feature gate like DebugHeaders, an env var, or a request header restricted to trusted callers) and/or redacting sensitive URL components before returning them.

Copilot uses AI. Check for mistakes.
return headers
Comment on lines +100 to +134
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The _build_debug_headers function and its usage introduce several new HTTP response headers (X-Backend-Server, X-Backend-Id, X-Backend-Pod, X-Routing-Logic) that disclose internal infrastructure details, such as backend URLs and pod names. These headers are added unconditionally to all responses, which constitutes an information disclosure vulnerability. This information should be restricted to authorized users or controlled by a configuration flag (e.g., --enable-debug-headers) and disabled by default in production environments. Additionally, for better static analysis, the type hint list for endpoints on line 102 should be more precise, e.g., List[EndpointInfo].



# TODO: (Brian) check if request is json beforehand
async def process_request(
request: Request,
Expand Down Expand Up @@ -484,6 +521,12 @@ async def route_general_request(
if key.lower() not in _HEADERS_TO_STRIP_FROM_RESPONSE
}
headers_dict["X-Request-Id"] = request_id
# Add debug headers with backend information
headers_dict.update(
_build_debug_headers(
server_url, endpoints, router=request.app.state.router
)
)
last_error = None
break
except HTTPException:
Expand Down Expand Up @@ -651,7 +694,17 @@ async def generate_stream():
return StreamingResponse(
generate_stream(),
media_type="application/json",
headers={"X-Request-Id": request_id},
headers={
"X-Request-Id": request_id,
"X-Backend-Server-Prefill": str(
request.app.state.prefill_client._base_url
),
"X-Backend-Server-Decode": str(
request.app.state.decode_client._base_url
),
Comment on lines +699 to +704
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

In the route_disaggregated_prefill_request function, internal URLs for prefill and decode backends are directly exposed in the X-Backend-Server-Prefill and X-Backend-Server-Decode response headers. This reveals internal network topology to the client, constituting an information disclosure vulnerability. This information should be restricted to authorized users or controlled by a configuration flag. Additionally, these debug headers are manually constructed here, bypassing the _build_debug_headers helper function, which leads to code duplication and potential inconsistencies. It's recommended to reuse _build_debug_headers to maintain a single source of truth for debug header generation, and avoid direct access to internal implementation details like _base_url.

"X-Backend-Type": "disaggregated",
"X-Routing-Logic": "DisaggregatedPrefillRouter",
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

X-Routing-Logic is hardcoded here, while other paths derive it from the router instance (type(router).__name__). This risks drifting if the class is renamed or the routing logic changes. Prefer deriving it dynamically (or reusing _build_debug_headers / a shared helper) for consistency.

Suggested change
"X-Routing-Logic": "DisaggregatedPrefillRouter",
"X-Routing-Logic": DisaggregatedPrefillRouter.__name__,

Copilot uses AI. Check for mistakes.
},
)


Expand Down Expand Up @@ -727,10 +780,18 @@ async def route_sleep_wakeup_request(
elif endpoint == "/wake_up":
service_discovery.remove_sleep_label(pod_name)

response_headers = {
"X-Request-Id": request_id,
"X-Backend-Server": server_url,
"X-Backend-Id": endpoints[0].Id,
Comment on lines +783 to +786
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description indicates X-Backend-Server is only returned for the General and Multipart paths, but this sleep/wakeup response also includes it. Either update the PR description/header table to match the implementation, or drop this header from the sleep/wakeup path to keep the documented contract accurate.

Copilot uses AI. Check for mistakes.
}
if endpoints[0].pod_name:
response_headers["X-Backend-Pod"] = endpoints[0].pod_name
Comment on lines +783 to +789
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The debug headers for route_sleep_wakeup_request are manually constructed here, duplicating logic that is already encapsulated in the _build_debug_headers helper function. To improve consistency and reduce redundancy, please use the _build_debug_headers function.

For example, you can call _build_debug_headers(server_url, endpoints) and then merge the result into response_headers.

            response_headers = {
                "X-Request-Id": request_id,
                **_build_debug_headers(server_url, endpoints)
            }


return JSONResponse(
status_code=response_status,
content={"status": "success"},
headers={"X-Request-Id": request_id},
headers=response_headers,
)


Expand Down Expand Up @@ -900,6 +961,12 @@ async def proxy_multipart_request(
}

headers["X-Request-Id"] = request_id
# Add debug headers with backend information
headers.update(
_build_debug_headers(
chosen_url, endpoints, router=request.app.state.router
)
)

return JSONResponse(
content=response_content,
Expand Down
Loading