-
Notifications
You must be signed in to change notification settings - Fork 372
feat: add debug response headers with backend information #864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,119 @@ | ||||||||
| # Copyright 2024-2025 The vLLM Production Stack Authors. | ||||||||
| # | ||||||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||
| # you may not use this file except in compliance with the License. | ||||||||
| # You may obtain a copy of the License at | ||||||||
| # | ||||||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||||||
| # | ||||||||
| # Unless required by applicable law or agreed to in writing, software | ||||||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||
| # See the License for the specific language governing permissions and | ||||||||
| # limitations under the License. | ||||||||
|
|
||||||||
| """Tests for the _build_debug_headers helper function.""" | ||||||||
|
|
||||||||
| import unittest | ||||||||
| from dataclasses import dataclass, field | ||||||||
| from typing import Dict, List, Optional | ||||||||
| from unittest.mock import MagicMock | ||||||||
|
|
||||||||
| from vllm_router.services.request_service.request import _build_debug_headers | ||||||||
|
|
||||||||
|
|
||||||||
| @dataclass | ||||||||
| class MockEndpointInfo: | ||||||||
| url: str | ||||||||
| model_names: List[str] | ||||||||
| Id: str | ||||||||
| added_timestamp: float = 0.0 | ||||||||
| model_label: str = "" | ||||||||
| sleep: bool = False | ||||||||
| pod_name: Optional[str] = None | ||||||||
| service_name: Optional[str] = None | ||||||||
| namespace: Optional[str] = None | ||||||||
| model_info: Dict = field(default_factory=dict) | ||||||||
|
|
||||||||
|
|
||||||||
| class TestBuildDebugHeaders(unittest.TestCase): | ||||||||
| """Test the _build_debug_headers function.""" | ||||||||
|
|
||||||||
| def setUp(self): | ||||||||
| self.endpoints = [ | ||||||||
| MockEndpointInfo( | ||||||||
| url="http://backend-1:8000", | ||||||||
| model_names=["llama"], | ||||||||
| Id="ep-1", | ||||||||
| pod_name="vllm-pod-abc", | ||||||||
| ), | ||||||||
| MockEndpointInfo( | ||||||||
| url="http://backend-2:8000", | ||||||||
| model_names=["llama"], | ||||||||
| Id="ep-2", | ||||||||
| pod_name="vllm-pod-def", | ||||||||
| ), | ||||||||
| MockEndpointInfo( | ||||||||
| url="http://backend-3:8000", | ||||||||
| model_names=["llama"], | ||||||||
| Id="ep-3", | ||||||||
| pod_name=None, | ||||||||
| ), | ||||||||
| ] | ||||||||
|
|
||||||||
| def test_basic_debug_headers(self): | ||||||||
| """Test that basic debug headers are returned correctly.""" | ||||||||
| headers = _build_debug_headers( | ||||||||
| "http://backend-1:8000", self.endpoints | ||||||||
| ) | ||||||||
| self.assertEqual(headers["X-Backend-Server"], "http://backend-1:8000") | ||||||||
| self.assertEqual(headers["X-Backend-Id"], "ep-1") | ||||||||
| self.assertEqual(headers["X-Backend-Pod"], "vllm-pod-abc") | ||||||||
|
|
||||||||
| def test_debug_headers_with_router(self): | ||||||||
| """Test that routing logic name is included when router is provided.""" | ||||||||
| router = MagicMock() | ||||||||
| router.__class__.__name__ = "RoundRobinRouter" | ||||||||
|
Comment on lines
+75
to
+76
|
||||||||
| router = MagicMock() | |
| router.__class__.__name__ = "RoundRobinRouter" | |
| router = type("RoundRobinRouter", (), {})() |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -97,6 +97,43 @@ | |||||
| } | ||||||
|
|
||||||
|
|
||||||
| def _build_debug_headers( | ||||||
| server_url: str, | ||||||
| endpoints: list, | ||||||
| router=None, | ||||||
| ) -> dict: | ||||||
| """Build debug response headers with backend information. | ||||||
|
|
||||||
| These headers help operators identify which backend processed a request, | ||||||
| the routing logic used, and other useful debugging information. | ||||||
|
|
||||||
| Args: | ||||||
| server_url: The URL of the backend that handled the request. | ||||||
| endpoints: The list of EndpointInfo objects considered for routing. | ||||||
| router: The router instance (optional), used to extract routing logic name. | ||||||
|
|
||||||
| Returns: | ||||||
| A dict of debug headers to merge into the response headers. | ||||||
| """ | ||||||
| headers = { | ||||||
| "X-Backend-Server": server_url, | ||||||
| } | ||||||
|
|
||||||
| # Find the endpoint that was used and add its metadata | ||||||
| for ep in endpoints: | ||||||
| if ep.url == server_url: | ||||||
| headers["X-Backend-Id"] = ep.Id | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
| if ep.pod_name: | ||||||
| headers["X-Backend-Pod"] = ep.pod_name | ||||||
| break | ||||||
|
|
||||||
| # Add routing logic type | ||||||
| if router is not None: | ||||||
| headers["X-Routing-Logic"] = type(router).__name__ | ||||||
|
|
||||||
|
Comment on lines
+118
to
+133
|
||||||
| return headers | ||||||
|
Comment on lines
+100
to
+134
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||||||
|
|
||||||
|
|
||||||
| # TODO: (Brian) check if request is json beforehand | ||||||
| async def process_request( | ||||||
| request: Request, | ||||||
|
|
@@ -484,6 +521,12 @@ async def route_general_request( | |||||
| if key.lower() not in _HEADERS_TO_STRIP_FROM_RESPONSE | ||||||
| } | ||||||
| headers_dict["X-Request-Id"] = request_id | ||||||
| # Add debug headers with backend information | ||||||
| headers_dict.update( | ||||||
| _build_debug_headers( | ||||||
| server_url, endpoints, router=request.app.state.router | ||||||
| ) | ||||||
| ) | ||||||
| last_error = None | ||||||
| break | ||||||
| except HTTPException: | ||||||
|
|
@@ -651,7 +694,17 @@ async def generate_stream(): | |||||
| return StreamingResponse( | ||||||
| generate_stream(), | ||||||
| media_type="application/json", | ||||||
| headers={"X-Request-Id": request_id}, | ||||||
| headers={ | ||||||
| "X-Request-Id": request_id, | ||||||
| "X-Backend-Server-Prefill": str( | ||||||
| request.app.state.prefill_client._base_url | ||||||
| ), | ||||||
| "X-Backend-Server-Decode": str( | ||||||
| request.app.state.decode_client._base_url | ||||||
| ), | ||||||
|
Comment on lines
+699
to
+704
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the |
||||||
| "X-Backend-Type": "disaggregated", | ||||||
| "X-Routing-Logic": "DisaggregatedPrefillRouter", | ||||||
|
||||||
| "X-Routing-Logic": "DisaggregatedPrefillRouter", | |
| "X-Routing-Logic": DisaggregatedPrefillRouter.__name__, |
Copilot
AI
Mar 4, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PR description indicates X-Backend-Server is only returned for the General and Multipart paths, but this sleep/wakeup response also includes it. Either update the PR description/header table to match the implementation, or drop this header from the sleep/wakeup path to keep the documented contract accurate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The debug headers for route_sleep_wakeup_request are manually constructed here, duplicating logic that is already encapsulated in the _build_debug_headers helper function. To improve consistency and reduce redundancy, please use the _build_debug_headers function.
For example, you can call _build_debug_headers(server_url, endpoints) and then merge the result into response_headers.
response_headers = {
"X-Request-Id": request_id,
**_build_debug_headers(server_url, endpoints)
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The attribute
IdinMockEndpointInfois capitalized. According to PEP 8, variable and attribute names should be insnake_case(e.g.,id). While this is a mock object, it's good practice to maintain consistency with Python's naming conventions, especially ifEndpointInfo(the real class) also usesId.