diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py index 7ee4776c..25beb1ce 100644 --- a/src/guidellm/backend/openai.py +++ b/src/guidellm/backend/openai.py @@ -49,6 +49,8 @@ class OpenAIHTTPBackend(Backend): If not provided, the default timeout provided from settings is used. :param http2: If True, uses HTTP/2 for requests to the OpenAI server. Defaults to True. + :param follow_redirects: If True, the HTTP client will follow redirect responses. + If not provided, the default value from settings is used. :param max_output_tokens: The maximum number of tokens to request for completions. If not provided, the default maximum tokens provided from settings is used. """ @@ -62,6 +64,7 @@ def __init__( project: Optional[str] = None, timeout: Optional[float] = None, http2: Optional[bool] = True, + follow_redirects: Optional[bool] = None, max_output_tokens: Optional[int] = None, ): super().__init__(type_="openai_http") @@ -88,6 +91,11 @@ def __init__( self.project = project or settings.openai.project self.timeout = timeout if timeout is not None else settings.request_timeout self.http2 = http2 if http2 is not None else settings.request_http2 + self.follow_redirects = ( + follow_redirects + if follow_redirects is not None + else settings.request_follow_redirects + ) self.max_output_tokens = ( max_output_tokens if max_output_tokens is not None @@ -120,6 +128,7 @@ def info(self) -> dict[str, Any]: "max_output_tokens": self.max_output_tokens, "timeout": self.timeout, "http2": self.http2, + "follow_redirects": self.follow_redirects, "authorization": bool(self.authorization), "organization": self.organization, "project": self.project, @@ -319,7 +328,11 @@ def _get_async_client(self) -> httpx.AsyncClient: :return: The async HTTP client. """ if self._async_client is None: - client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout) + client = httpx.AsyncClient( + http2=self.http2, + timeout=self.timeout, + follow_redirects=self.follow_redirects, + ) self._async_client = client else: client = self._async_client @@ -449,12 +462,13 @@ async def _iterative_completions_request( raise ValueError(f"Unsupported type: {type_}") logger.info( - "{} making request: {} to target: {} using http2: {} for " - "timeout: {} with headers: {} and payload: {}", + "{} making request: {} to target: {} using http2: {} following " + "redirects: {} for timeout: {} with headers: {} and payload: {}", self.__class__.__name__, request_id, target, self.http2, + self.follow_redirects, self.timeout, headers, payload, @@ -544,6 +558,7 @@ async def _iterative_completions_request( payload=payload, timeout=self.timeout, http2=self.http2, + follow_redirects=self.follow_redirects, ), start_time=start_time, end_time=iter_time, diff --git a/src/guidellm/backend/response.py b/src/guidellm/backend/response.py index 4875ade5..b4ee66b2 100644 --- a/src/guidellm/backend/response.py +++ b/src/guidellm/backend/response.py @@ -52,6 +52,7 @@ class RequestArgs(StandardBaseModel): content and other configurations. :param timeout: The timeout for the request in seconds, if any. :param http2: Whether HTTP/2 was used for the request, if applicable. + :param follow_redirects: Whether the request should follow redirect responses. """ target: str @@ -59,6 +60,7 @@ class RequestArgs(StandardBaseModel): payload: dict[str, Any] timeout: Optional[float] = None http2: Optional[bool] = None + follow_redirects: Optional[bool] = None class ResponseSummary(StandardBaseModel): diff --git a/src/guidellm/config.py b/src/guidellm/config.py index 9afcea74..ed7e782b 100644 --- a/src/guidellm/config.py +++ b/src/guidellm/config.py @@ -115,6 +115,7 @@ class Settings(BaseSettings): default_sweep_number: int = 10 # HTTP settings + request_follow_redirects: bool = True request_timeout: int = 60 * 5 # 5 minutes request_http2: bool = True diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py index e9c3c687..d8663438 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backend/test_openai_backend.py @@ -16,6 +16,7 @@ def test_openai_http_backend_default_initialization(): assert backend.project == settings.openai.project assert backend.timeout == settings.request_timeout assert backend.http2 is True + assert backend.follow_redirects is True assert backend.max_output_tokens == settings.openai.max_output_tokens @@ -29,6 +30,7 @@ def test_openai_http_backend_intialization(): project="test-proj", timeout=10, http2=False, + follow_redirects=False, max_output_tokens=100, ) assert backend.target == "http://test-target" @@ -38,6 +40,7 @@ def test_openai_http_backend_intialization(): assert backend.project == "test-proj" assert backend.timeout == 10 assert backend.http2 is False + assert backend.follow_redirects is False assert backend.max_output_tokens == 100 diff --git a/tests/unit/backend/test_response.py b/tests/unit/backend/test_response.py index dd0c1349..8b251faf 100644 --- a/tests/unit/backend/test_response.py +++ b/tests/unit/backend/test_response.py @@ -80,6 +80,7 @@ def test_request_args_default_initialization(): ) assert args.timeout is None assert args.http2 is None + assert args.follow_redirects is None @pytest.mark.smoke @@ -94,12 +95,14 @@ def test_request_args_initialization(): }, timeout=10.0, http2=True, + follow_redirects=True, ) assert args.target == "http://example.com" assert args.headers == {"Authorization": "Bearer token"} assert args.payload == {"query": "Hello, world!"} assert args.timeout == 10.0 assert args.http2 is True + assert args.follow_redirects is True @pytest.mark.smoke