vllm-project · NJX-njx · Mar 4, 2026 · gemini-code-assist · Mar 4, 2026 · Copilot
diff --git a/src/tests/test_debug_headers.py b/src/tests/test_debug_headers.py
@@ -0,0 +1,119 @@
+# Copyright 2024-2025 The vLLM Production Stack Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for the _build_debug_headers helper function."""
+
+import unittest
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+from unittest.mock import MagicMock
+
+from vllm_router.services.request_service.request import _build_debug_headers
+
+
+@dataclass
+class MockEndpointInfo:
+    url: str
+    model_names: List[str]
+    Id: str
-    Id: str
+    id: str
-    Id: str
+    id: str
+    added_timestamp: float = 0.0
+    model_label: str = ""
+    sleep: bool = False
+    pod_name: Optional[str] = None
+    service_name: Optional[str] = None
+    namespace: Optional[str] = None
+    model_info: Dict = field(default_factory=dict)
+
+
+class TestBuildDebugHeaders(unittest.TestCase):
+    """Test the _build_debug_headers function."""
+
+    def setUp(self):
+        self.endpoints = [
+            MockEndpointInfo(
+                url="http://backend-1:8000",
+                model_names=["llama"],
+                Id="ep-1",
+                pod_name="vllm-pod-abc",
+            ),
+            MockEndpointInfo(
+                url="http://backend-2:8000",
+                model_names=["llama"],
+                Id="ep-2",
+                pod_name="vllm-pod-def",
+            ),
+            MockEndpointInfo(
+                url="http://backend-3:8000",
+                model_names=["llama"],
+                Id="ep-3",
+                pod_name=None,
+            ),
+        ]
+
+    def test_basic_debug_headers(self):
+        """Test that basic debug headers are returned correctly."""
+        headers = _build_debug_headers(
+            "http://backend-1:8000", self.endpoints
+        )
+        self.assertEqual(headers["X-Backend-Server"], "http://backend-1:8000")
+        self.assertEqual(headers["X-Backend-Id"], "ep-1")
+        self.assertEqual(headers["X-Backend-Pod"], "vllm-pod-abc")
+
+    def test_debug_headers_with_router(self):
+        """Test that routing logic name is included when router is provided."""
+        router = MagicMock()
+        router.__class__.__name__ = "RoundRobinRouter"
-        router = MagicMock()
-        router.__class__.__name__ = "RoundRobinRouter"
+        router = type("RoundRobinRouter", (), {})()
-        router = MagicMock()
-        router.__class__.__name__ = "RoundRobinRouter"
+        router = type("RoundRobinRouter", (), {})()
+        headers = _build_debug_headers(
+            "http://backend-1:8000", self.endpoints, router=router
+        )
+        self.assertEqual(headers["X-Backend-Server"], "http://backend-1:8000")
+        self.assertEqual(headers["X-Routing-Logic"], "RoundRobinRouter")
+
+    def test_debug_headers_no_pod_name(self):
+        """Test that X-Backend-Pod is omitted when pod_name is None."""
+        headers = _build_debug_headers(
+            "http://backend-3:8000", self.endpoints
+        )
+        self.assertEqual(headers["X-Backend-Server"], "http://backend-3:8000")
+        self.assertEqual(headers["X-Backend-Id"], "ep-3")
+        self.assertNotIn("X-Backend-Pod", headers)
+
+    def test_debug_headers_unknown_server(self):
+        """Test behavior when server_url doesn't match any endpoint."""
+        headers = _build_debug_headers(
+            "http://unknown:8000", self.endpoints
+        )
+        self.assertEqual(headers["X-Backend-Server"], "http://unknown:8000")
+        self.assertNotIn("X-Backend-Id", headers)
+        self.assertNotIn("X-Backend-Pod", headers)
+
+    def test_debug_headers_without_router(self):
+        """Test that X-Routing-Logic is omitted when router is None."""
+        headers = _build_debug_headers(
+            "http://backend-1:8000", self.endpoints, router=None
+        )
+        self.assertNotIn("X-Routing-Logic", headers)
+
+    def test_debug_headers_second_endpoint(self):
+        """Test that correct endpoint metadata is returned for second backend."""
+        headers = _build_debug_headers(
+            "http://backend-2:8000", self.endpoints
+        )
+        self.assertEqual(headers["X-Backend-Server"], "http://backend-2:8000")
+        self.assertEqual(headers["X-Backend-Id"], "ep-2")
+        self.assertEqual(headers["X-Backend-Pod"], "vllm-pod-def")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/src/vllm_router/services/request_service/request.py b/src/vllm_router/services/request_service/request.py
@@ -97,6 +97,43 @@
 }
 
 
+def _build_debug_headers(
+    server_url: str,
+    endpoints: list,
+    router=None,
+) -> dict:
+    """Build debug response headers with backend information.
+
+    These headers help operators identify which backend processed a request,
+    the routing logic used, and other useful debugging information.
+
+    Args:
+        server_url: The URL of the backend that handled the request.
+        endpoints: The list of EndpointInfo objects considered for routing.
+        router: The router instance (optional), used to extract routing logic name.
+
+    Returns:
+        A dict of debug headers to merge into the response headers.
+    """
+    headers = {
+        "X-Backend-Server": server_url,
+    }
+
+    # Find the endpoint that was used and add its metadata
+    for ep in endpoints:
+        if ep.url == server_url:
+            headers["X-Backend-Id"] = ep.Id
-            headers["X-Backend-Id"] = ep.Id
+            headers["X-Backend-Id"] = ep.id
-            headers["X-Backend-Id"] = ep.Id
+            headers["X-Backend-Id"] = ep.id
+            if ep.pod_name:
+                headers["X-Backend-Pod"] = ep.pod_name
+            break
+
+    # Add routing logic type
+    if router is not None:
+        headers["X-Routing-Logic"] = type(router).__name__
+
+    return headers
+
+
 # TODO: (Brian) check if request is json beforehand
 async def process_request(
     request: Request,
@@ -484,6 +521,12 @@ async def route_general_request(
                 if key.lower() not in _HEADERS_TO_STRIP_FROM_RESPONSE
             }
             headers_dict["X-Request-Id"] = request_id
+            # Add debug headers with backend information
+            headers_dict.update(
+                _build_debug_headers(
+                    server_url, endpoints, router=request.app.state.router
+                )
+            )
             last_error = None
             break
         except HTTPException:
@@ -651,7 +694,17 @@ async def generate_stream():
     return StreamingResponse(
         generate_stream(),
         media_type="application/json",
-        headers={"X-Request-Id": request_id},
+        headers={
+            "X-Request-Id": request_id,
+            "X-Backend-Server-Prefill": str(
+                request.app.state.prefill_client._base_url
+            ),
+            "X-Backend-Server-Decode": str(
+                request.app.state.decode_client._base_url
+            ),
+            "X-Backend-Type": "disaggregated",
+            "X-Routing-Logic": "DisaggregatedPrefillRouter",
-            "X-Routing-Logic": "DisaggregatedPrefillRouter",
+            "X-Routing-Logic": DisaggregatedPrefillRouter.__name__,
-            "X-Routing-Logic": "DisaggregatedPrefillRouter",
+            "X-Routing-Logic": DisaggregatedPrefillRouter.__name__,
+        },
     )
 
 
@@ -727,10 +780,18 @@ async def route_sleep_wakeup_request(
             elif endpoint == "/wake_up":
                 service_discovery.remove_sleep_label(pod_name)
 
+            response_headers = {
+                "X-Request-Id": request_id,
+                "X-Backend-Server": server_url,
+                "X-Backend-Id": endpoints[0].Id,
+            }
+            if endpoints[0].pod_name:
+                response_headers["X-Backend-Pod"] = endpoints[0].pod_name
+
             return JSONResponse(
                 status_code=response_status,
                 content={"status": "success"},
-                headers={"X-Request-Id": request_id},
+                headers=response_headers,
             )
 
 
@@ -900,6 +961,12 @@ async def proxy_multipart_request(
         }
 
         headers["X-Request-Id"] = request_id
+        # Add debug headers with backend information
+        headers.update(
+            _build_debug_headers(
+                chosen_url, endpoints, router=request.app.state.router
+            )
+        )
 
         return JSONResponse(
             content=response_content,