vllm-project · markurtz · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -49,6 +49,8 @@ class OpenAIHTTPBackend(Backend):
         If not provided, the default timeout provided from settings is used.
     :param http2: If True, uses HTTP/2 for requests to the OpenAI server.
         Defaults to True.
+    :param follow_redirects: If True, the HTTP client will follow redirect responses.
+        If not provided, the default value from settings is used.
     :param max_output_tokens: The maximum number of tokens to request for completions.
         If not provided, the default maximum tokens provided from settings is used.
     """
@@ -62,6 +64,7 @@ def __init__(
         project: Optional[str] = None,
         timeout: Optional[float] = None,
         http2: Optional[bool] = True,
+        follow_redirects: Optional[bool] = None,
         max_output_tokens: Optional[int] = None,
     ):
         super().__init__(type_="openai_http")
@@ -88,6 +91,11 @@ def __init__(
         self.project = project or settings.openai.project
         self.timeout = timeout if timeout is not None else settings.request_timeout
         self.http2 = http2 if http2 is not None else settings.request_http2
+        self.follow_redirects = (
+            follow_redirects
+            if follow_redirects is not None
+            else settings.request_follow_redirects
+        )
         self.max_output_tokens = (
             max_output_tokens
             if max_output_tokens is not None
@@ -120,6 +128,7 @@ def info(self) -> dict[str, Any]:
             "max_output_tokens": self.max_output_tokens,
             "timeout": self.timeout,
             "http2": self.http2,
+            "follow_redirects": self.follow_redirects,
             "authorization": bool(self.authorization),
             "organization": self.organization,
             "project": self.project,
@@ -319,7 +328,11 @@ def _get_async_client(self) -> httpx.AsyncClient:
         :return: The async HTTP client.
         """
         if self._async_client is None:
-            client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
+            client = httpx.AsyncClient(
+                http2=self.http2,
+                timeout=self.timeout,
+                follow_redirects=self.follow_redirects,
+            )
             self._async_client = client
         else:
             client = self._async_client
@@ -449,12 +462,13 @@ async def _iterative_completions_request(
             raise ValueError(f"Unsupported type: {type_}")
 
         logger.info(
-            "{} making request: {} to target: {} using http2: {} for "
-            "timeout: {} with headers: {} and payload: {}",
+            "{} making request: {} to target: {} using http2: {} following "
+            "redirects: {} for timeout: {} with headers: {} and payload: {}",
             self.__class__.__name__,
             request_id,
             target,
             self.http2,
+            self.follow_redirects,
             self.timeout,
             headers,
             payload,
@@ -544,6 +558,7 @@ async def _iterative_completions_request(
                 payload=payload,
                 timeout=self.timeout,
                 http2=self.http2,
+                follow_redirects=self.follow_redirects,
             ),
             start_time=start_time,
             end_time=iter_time,

diff --git a/src/guidellm/backend/response.py b/src/guidellm/backend/response.py
@@ -52,13 +52,15 @@ class RequestArgs(StandardBaseModel):
         content and other configurations.
     :param timeout: The timeout for the request in seconds, if any.
     :param http2: Whether HTTP/2 was used for the request, if applicable.
+    :param follow_redirects: Whether the request should follow redirect responses.
     """
 
     target: str
     headers: dict[str, str]
     payload: dict[str, Any]
     timeout: Optional[float] = None
     http2: Optional[bool] = None
+    follow_redirects: Optional[bool] = None
 
 
 class ResponseSummary(StandardBaseModel):

diff --git a/src/guidellm/config.py b/src/guidellm/config.py
@@ -115,6 +115,7 @@ class Settings(BaseSettings):
     default_sweep_number: int = 10
 
     # HTTP settings
+    request_follow_redirects: bool = True
     request_timeout: int = 60 * 5  # 5 minutes
     request_http2: bool = True
 

diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py
@@ -16,6 +16,7 @@ def test_openai_http_backend_default_initialization():
     assert backend.project == settings.openai.project
     assert backend.timeout == settings.request_timeout
     assert backend.http2 is True
+    assert backend.follow_redirects is True
     assert backend.max_output_tokens == settings.openai.max_output_tokens
 
 
@@ -29,6 +30,7 @@ def test_openai_http_backend_intialization():
         project="test-proj",
         timeout=10,
         http2=False,
+        follow_redirects=False,
         max_output_tokens=100,
     )
     assert backend.target == "http://test-target"
@@ -38,6 +40,7 @@ def test_openai_http_backend_intialization():
     assert backend.project == "test-proj"
     assert backend.timeout == 10
     assert backend.http2 is False
+    assert backend.follow_redirects is False
     assert backend.max_output_tokens == 100
 
 

diff --git a/tests/unit/backend/test_response.py b/tests/unit/backend/test_response.py
@@ -80,6 +80,7 @@ def test_request_args_default_initialization():
     )
     assert args.timeout is None
     assert args.http2 is None
+    assert args.follow_redirects is None
 
 
 @pytest.mark.smoke
@@ -94,12 +95,14 @@ def test_request_args_initialization():
         },
         timeout=10.0,
         http2=True,
+        follow_redirects=True,
     )
     assert args.target == "http://example.com"
     assert args.headers == {"Authorization": "Bearer token"}
     assert args.payload == {"query": "Hello, world!"}
     assert args.timeout == 10.0
     assert args.http2 is True
+    assert args.follow_redirects is True
 
 
 @pytest.mark.smoke