Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/guidellm/backend/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class OpenAIHTTPBackend(Backend):
If not provided, the default timeout provided from settings is used.
:param http2: If True, uses HTTP/2 for requests to the OpenAI server.
Defaults to True.
:param follow_redirects: If True, the HTTP client will follow redirect responses.
If not provided, the default value from settings is used.
:param max_output_tokens: The maximum number of tokens to request for completions.
If not provided, the default maximum tokens provided from settings is used.
"""
Expand All @@ -62,6 +64,7 @@ def __init__(
project: Optional[str] = None,
timeout: Optional[float] = None,
http2: Optional[bool] = True,
follow_redirects: Optional[bool] = None,
max_output_tokens: Optional[int] = None,
):
super().__init__(type_="openai_http")
Expand All @@ -88,6 +91,11 @@ def __init__(
self.project = project or settings.openai.project
self.timeout = timeout if timeout is not None else settings.request_timeout
self.http2 = http2 if http2 is not None else settings.request_http2
self.follow_redirects = (
follow_redirects
if follow_redirects is not None
else settings.request_follow_redirects
)
self.max_output_tokens = (
max_output_tokens
if max_output_tokens is not None
Expand Down Expand Up @@ -120,6 +128,7 @@ def info(self) -> dict[str, Any]:
"max_output_tokens": self.max_output_tokens,
"timeout": self.timeout,
"http2": self.http2,
"follow_redirects": self.follow_redirects,
"authorization": bool(self.authorization),
"organization": self.organization,
"project": self.project,
Expand Down Expand Up @@ -319,7 +328,11 @@ def _get_async_client(self) -> httpx.AsyncClient:
:return: The async HTTP client.
"""
if self._async_client is None:
client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
client = httpx.AsyncClient(
http2=self.http2,
timeout=self.timeout,
follow_redirects=self.follow_redirects,
)
self._async_client = client
else:
client = self._async_client
Expand Down Expand Up @@ -449,12 +462,13 @@ async def _iterative_completions_request(
raise ValueError(f"Unsupported type: {type_}")

logger.info(
"{} making request: {} to target: {} using http2: {} for "
"timeout: {} with headers: {} and payload: {}",
"{} making request: {} to target: {} using http2: {} following "
"redirects: {} for timeout: {} with headers: {} and payload: {}",
self.__class__.__name__,
request_id,
target,
self.http2,
self.follow_redirects,
self.timeout,
headers,
payload,
Expand Down Expand Up @@ -544,6 +558,7 @@ async def _iterative_completions_request(
payload=payload,
timeout=self.timeout,
http2=self.http2,
follow_redirects=self.follow_redirects,
),
start_time=start_time,
end_time=iter_time,
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/backend/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,15 @@ class RequestArgs(StandardBaseModel):
content and other configurations.
:param timeout: The timeout for the request in seconds, if any.
:param http2: Whether HTTP/2 was used for the request, if applicable.
:param follow_redirects: Whether the request should follow redirect responses.
"""

target: str
headers: dict[str, str]
payload: dict[str, Any]
timeout: Optional[float] = None
http2: Optional[bool] = None
follow_redirects: Optional[bool] = None


class ResponseSummary(StandardBaseModel):
Expand Down
1 change: 1 addition & 0 deletions src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class Settings(BaseSettings):
default_sweep_number: int = 10

# HTTP settings
request_follow_redirects: bool = True
request_timeout: int = 60 * 5 # 5 minutes
request_http2: bool = True

Expand Down
3 changes: 3 additions & 0 deletions tests/unit/backend/test_openai_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_openai_http_backend_default_initialization():
assert backend.project == settings.openai.project
assert backend.timeout == settings.request_timeout
assert backend.http2 is True
assert backend.follow_redirects is True
assert backend.max_output_tokens == settings.openai.max_output_tokens


Expand All @@ -29,6 +30,7 @@ def test_openai_http_backend_intialization():
project="test-proj",
timeout=10,
http2=False,
follow_redirects=False,
max_output_tokens=100,
)
assert backend.target == "http://test-target"
Expand All @@ -38,6 +40,7 @@ def test_openai_http_backend_intialization():
assert backend.project == "test-proj"
assert backend.timeout == 10
assert backend.http2 is False
assert backend.follow_redirects is False
assert backend.max_output_tokens == 100


Expand Down
3 changes: 3 additions & 0 deletions tests/unit/backend/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def test_request_args_default_initialization():
)
assert args.timeout is None
assert args.http2 is None
assert args.follow_redirects is None


@pytest.mark.smoke
Expand All @@ -94,12 +95,14 @@ def test_request_args_initialization():
},
timeout=10.0,
http2=True,
follow_redirects=True,
)
assert args.target == "http://example.com"
assert args.headers == {"Authorization": "Bearer token"}
assert args.payload == {"query": "Hello, world!"}
assert args.timeout == 10.0
assert args.http2 is True
assert args.follow_redirects is True


@pytest.mark.smoke
Expand Down
Loading