Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ name = "guidellm"
version = "0.1.0"
description = "Guidance platform for deploying and managing large language models."
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.8.0,<4.0"
requires-python = ">=3.9.0,<4.0"
license = { file = "LICENSE" }
authors = [ { name = "Neuralmagic, Inc." } ]
urls = { homepage = "https://github.com/neuralmagic/guidellm" }
Expand Down Expand Up @@ -93,7 +93,7 @@ profile = "black"

[tool.mypy]
files = ["src/guidellm", "tests"]
python_version = '3.8'
python_version = '3.9'
warn_redundant_casts = true
warn_unused_ignores = false
show_error_codes = true
Expand Down
15 changes: 8 additions & 7 deletions src/guidellm/backend/backend.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union
from typing import Any, Literal, Optional, Union

from loguru import logger
from PIL import Image
Expand Down Expand Up @@ -28,7 +29,7 @@ class Backend(ABC):
:param type_: The type of the backend.
"""

_registry: Dict[BackendType, "Type[Backend]"] = {}
_registry: dict[BackendType, "type[Backend]"] = {}

@classmethod
def register(cls, backend_type: BackendType):
Expand All @@ -46,7 +47,7 @@ def register(cls, backend_type: BackendType):
if not issubclass(cls, Backend):
raise TypeError("Only subclasses of Backend can be registered")

def inner_wrapper(wrapped_class: Type["Backend"]):
def inner_wrapper(wrapped_class: type["Backend"]):
cls._registry[backend_type] = wrapped_class
logger.info("Registered backend type: {}", backend_type)
return wrapped_class
Expand Down Expand Up @@ -103,7 +104,7 @@ def model(self) -> Optional[str]:

@property
@abstractmethod
def info(self) -> Dict[str, Any]:
def info(self) -> dict[str, Any]:
"""
:return: The information about the backend.
"""
Expand Down Expand Up @@ -146,7 +147,7 @@ async def prepare_multiprocessing(self):
...

@abstractmethod
async def available_models(self) -> List[str]:
async def available_models(self) -> list[str]:
"""
Get the list of available models for the backend.

Expand All @@ -158,7 +159,7 @@ async def available_models(self) -> List[str]:
@abstractmethod
async def text_completions(
self,
prompt: Union[str, List[str]],
prompt: Union[str, list[str]],
request_id: Optional[str] = None,
prompt_token_count: Optional[int] = None,
output_token_count: Optional[int] = None,
Expand Down Expand Up @@ -190,7 +191,7 @@ async def chat_completions(
self,
content: Union[
str,
List[Union[str, Dict[str, Union[str, Dict[str, str]]], Path, Image.Image]],
list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
Any,
],
request_id: Optional[str] = None,
Expand Down
33 changes: 17 additions & 16 deletions src/guidellm/backend/openai.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import base64
import json
import time
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Union
from typing import Any, Literal, Optional, Union

import httpx
from loguru import logger
Expand Down Expand Up @@ -111,7 +112,7 @@ def model(self) -> Optional[str]:
return self._model

@property
def info(self) -> Dict[str, Any]:
def info(self) -> dict[str, Any]:
"""
:return: The information about the backend.
"""
Expand Down Expand Up @@ -157,7 +158,7 @@ async def prepare_multiprocessing(self):
await self._async_client.aclose()
self._async_client = None

async def available_models(self) -> List[str]:
async def available_models(self) -> list[str]:
"""
Get the available models for the target server using the OpenAI models endpoint:
/v1/models
Expand All @@ -176,7 +177,7 @@ async def available_models(self) -> List[str]:

async def text_completions( # type: ignore[override]
self,
prompt: Union[str, List[str]],
prompt: Union[str, list[str]],
request_id: Optional[str] = None,
prompt_token_count: Optional[int] = None,
output_token_count: Optional[int] = None,
Expand Down Expand Up @@ -232,7 +233,7 @@ async def chat_completions( # type: ignore[override]
self,
content: Union[
str,
List[Union[str, Dict[str, Union[str, Dict[str, str]]], Path, Image.Image]],
list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
Any,
],
request_id: Optional[str] = None,
Expand Down Expand Up @@ -318,7 +319,7 @@ def _get_async_client(self) -> httpx.AsyncClient:

return client

def _headers(self) -> Dict[str, str]:
def _headers(self) -> dict[str, str]:
headers = {
"Content-Type": "application/json",
}
Expand All @@ -335,8 +336,8 @@ def _headers(self) -> Dict[str, str]:
return headers

def _completions_payload(
self, orig_kwargs: Optional[Dict], max_output_tokens: Optional[int], **kwargs
) -> Dict:
self, orig_kwargs: Optional[dict], max_output_tokens: Optional[int], **kwargs
) -> dict:
payload = orig_kwargs or {}
payload.update(kwargs)
payload["model"] = self.model
Expand Down Expand Up @@ -366,10 +367,10 @@ def _completions_payload(
def _create_chat_messages(
content: Union[
str,
List[Union[str, Dict[str, Union[str, Dict[str, str]]], Path, Image.Image]],
list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
Any,
],
) -> List[Dict]:
) -> list[dict]:
if isinstance(content, str):
return [
{
Expand All @@ -382,7 +383,7 @@ def _create_chat_messages(
resolved_content = []

for item in content:
if isinstance(item, Dict):
if isinstance(item, dict):
resolved_content.append(item)
elif isinstance(item, str):
resolved_content.append({"type": "text", "text": item})
Expand Down Expand Up @@ -430,8 +431,8 @@ async def _iterative_completions_request(
request_id: Optional[str],
request_prompt_tokens: Optional[int],
request_output_tokens: Optional[int],
headers: Dict,
payload: Dict,
headers: dict,
payload: dict,
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
if type_ == "text_completions":
target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
Expand Down Expand Up @@ -551,7 +552,7 @@ async def _iterative_completions_request(

@staticmethod
def _extract_completions_delta_content(
type_: Literal["text_completions", "chat_completions"], data: Dict
type_: Literal["text_completions", "chat_completions"], data: dict
) -> Optional[str]:
if "choices" not in data or not data["choices"]:
return None
Expand All @@ -566,8 +567,8 @@ def _extract_completions_delta_content(

@staticmethod
def _extract_completions_usage(
data: Dict,
) -> Optional[Dict[Literal["prompt", "output"], int]]:
data: dict,
) -> Optional[dict[Literal["prompt", "output"], int]]:
if "usage" not in data or not data["usage"]:
return None

Expand Down
6 changes: 3 additions & 3 deletions src/guidellm/backend/response.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Literal, Optional
from typing import Any, Literal, Optional

from pydantic import computed_field

Expand Down Expand Up @@ -55,8 +55,8 @@ class RequestArgs(StandardBaseModel):
"""

target: str
headers: Dict[str, str]
payload: Dict[str, Any]
headers: dict[str, str]
payload: dict[str, Any]
timeout: Optional[float] = None
http2: Optional[bool] = None

Expand Down
27 changes: 12 additions & 15 deletions src/guidellm/benchmark/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@
from pathlib import Path
from typing import (
Any,
Dict,
Generic,
List,
Literal,
Optional,
Tuple,
TypeVar,
Union,
)
Expand Down Expand Up @@ -258,7 +255,7 @@ class BenchmarkAggregator(
),
discriminator="type_",
)
extras: Dict[str, Any] = Field(
extras: dict[str, Any] = Field(
description=(
"Any additional information or metadata that was passed for this benchmark."
)
Expand Down Expand Up @@ -292,9 +289,9 @@ class BenchmarkAggregator(
default_factory=RequestsRunningStats,
)
results: StatusBreakdown[
List[SchedulerRequestResult[RequestT, ResponseT]],
List[SchedulerRequestResult[RequestT, ResponseT]],
List[SchedulerRequestResult[RequestT, ResponseT]],
list[SchedulerRequestResult[RequestT, ResponseT]],
list[SchedulerRequestResult[RequestT, ResponseT]],
list[SchedulerRequestResult[RequestT, ResponseT]],
None,
] = Field(
description=(
Expand Down Expand Up @@ -516,7 +513,7 @@ class GenerativeBenchmarkAggregator(
"avaiable that match the preferred source."
)
)
processor_args: Optional[Dict[str, Any]] = Field(
processor_args: Optional[dict[str, Any]] = Field(
description=(
"Additional arguments to pass to the tokenizer if it requires "
"any specific configuration for loading or processing."
Expand Down Expand Up @@ -636,12 +633,12 @@ def compile(self) -> GenerativeBenchmark:

def _compile_results(
self,
) -> Tuple[
List[GenerativeTextResponseStats],
List[GenerativeTextErrorStats],
List[GenerativeTextErrorStats],
) -> tuple[
list[GenerativeTextResponseStats],
list[GenerativeTextErrorStats],
list[GenerativeTextErrorStats],
]:
successful: List[GenerativeTextResponseStats] = [
successful: list[GenerativeTextResponseStats] = [
GenerativeTextResponseStats(
request_id=result.request.request_id,
request_type=result.request.request_type,
Expand Down Expand Up @@ -670,7 +667,7 @@ def _compile_results(
for result in self.results.successful
if result.request and result.response
]
incomplete: List[GenerativeTextErrorStats] = [
incomplete: list[GenerativeTextErrorStats] = [
GenerativeTextErrorStats(
error=result.response.error or "",
request_id=result.request.request_id,
Expand Down Expand Up @@ -700,7 +697,7 @@ def _compile_results(
for result in self.results.incomplete
if result.request and result.response
]
error: List[GenerativeTextErrorStats] = [
error: list[GenerativeTextErrorStats] = [
GenerativeTextErrorStats(
error=result.response.error or "",
request_id=result.request.request_id,
Expand Down
20 changes: 10 additions & 10 deletions src/guidellm/benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import random
import uuid
from typing import Any, Dict, List, Literal, Optional, TypeVar, Union
from typing import Any, Literal, Optional, TypeVar, Union

from pydantic import Field, computed_field

Expand Down Expand Up @@ -273,7 +273,7 @@ class Benchmark(StandardBaseModel):
"requests for this benchmark."
),
)
extras: Dict[str, Any] = Field(
extras: dict[str, Any] = Field(
description=(
"Any additional information or metadata that was passed for this benchmark."
)
Expand Down Expand Up @@ -608,9 +608,9 @@ def duration(self) -> float:
default=None,
)
requests: StatusBreakdown[
List[GenerativeTextResponseStats],
List[GenerativeTextErrorStats],
List[GenerativeTextErrorStats],
list[GenerativeTextResponseStats],
list[GenerativeTextErrorStats],
list[GenerativeTextErrorStats],
None,
] = Field(
description=(
Expand Down Expand Up @@ -663,14 +663,14 @@ def set_sample_size(self, sample_size: Optional[int]) -> "GenerativeBenchmark":
@staticmethod
def from_stats(
run_id: str,
successful: List[GenerativeTextResponseStats],
incomplete: List[GenerativeTextErrorStats],
errored: List[GenerativeTextErrorStats],
successful: list[GenerativeTextResponseStats],
incomplete: list[GenerativeTextErrorStats],
errored: list[GenerativeTextErrorStats],
args: BenchmarkArgs,
run_stats: BenchmarkRunStats,
worker: GenerativeRequestsWorkerDescription,
requests_loader: GenerativeRequestLoaderDescription,
extras: Optional[Dict[str, Any]],
extras: Optional[dict[str, Any]],
) -> "GenerativeBenchmark":
"""
Create a GenerativeBenchmark instance from the given statistics and metadata.
Expand All @@ -696,7 +696,7 @@ def from_stats(
populated and calculated
"""
total = successful + incomplete + errored
total_types: List[Literal["successful", "incomplete", "error"]] = [
total_types: list[Literal["successful", "incomplete", "error"]] = [
*["successful"] * len(successful), # type: ignore[list-item]
*["incomplete"] * len(incomplete), # type: ignore[list-item]
*["error"] * len(errored), # type: ignore[list-item]
Expand Down
10 changes: 4 additions & 6 deletions src/guidellm/benchmark/benchmarker.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import time
import uuid
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator, Iterable
from pathlib import Path
from typing import (
Any,
AsyncGenerator,
Dict,
Generic,
Iterable,
Literal,
Optional,
Union,
Expand Down Expand Up @@ -136,7 +134,7 @@ def __init__(
worker: RequestsWorker[RequestT, ResponseT],
request_loader: Iterable[RequestT],
requests_loader_description: RequestLoaderDescription,
benchmark_save_extras: Optional[Dict[str, Any]] = None,
benchmark_save_extras: Optional[dict[str, Any]] = None,
):
self.worker = worker
self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
Expand Down Expand Up @@ -294,9 +292,9 @@ def __init__(
backend: Backend,
request_loader: Iterable[GenerationRequest],
request_loader_description: GenerativeRequestLoaderDescription,
benchmark_save_extras: Optional[Dict[str, Any]] = None,
benchmark_save_extras: Optional[dict[str, Any]] = None,
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
processor_args: Optional[Dict[str, Any]] = None,
processor_args: Optional[dict[str, Any]] = None,
):
super().__init__(
worker=GenerativeRequestsWorker(backend),
Expand Down
Loading