Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,465 changes: 1,331 additions & 1,134 deletions pylock.toml

Large diffs are not rendered by default.

25 changes: 15 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ include = ["*"]
[tool.pdm]
distribution = true

[[tool.pdm.source]]
name = "torch"
type = "find_links"
#url = "https://download.pytorch.org/whl/cpu/torch_stable.html"
url = "https://download.pytorch.org/whl/cpu/torch/"
include_packages = ["torch"]


# ************************************************
# ********** Project Metadata **********
Expand Down Expand Up @@ -54,28 +61,26 @@ dependencies = [
"httpx[http2]<1.0.0",
"loguru",
"msgpack",
"numpy",
"numpy<2.0.0",
"pillow",
"protobuf",
"pydantic>=2.11.7",
"pydantic-settings>=2.0.0",
"pydub",
"pyyaml>=6.0.0",
"rich",
"sanic",
"transformers",
"uvloop>=0.18",
"librosa>=0.11.0",
"torch",
]

[project.optional-dependencies]
perf = [
"orjson",
"msgpack",
"msgspec",
"uvloop",
]
perf = ["orjson", "msgpack", "msgspec", "uvloop"]
recommended = [
"tiktoken>=0.11.0", # For OpenAI tokenizer
"blobfile>=3.1.0", # For OpenAI tokenizer
"tiktoken>=0.11.0", # For OpenAI tokenizer
"blobfile>=3.1.0", # For OpenAI tokenizer
]
dev = [
# build
Expand Down Expand Up @@ -118,7 +123,7 @@ dev = [
]

[dependency-groups]
dev = [ "guidellm[dev]" ]
dev = ["guidellm[dev]"]

[project.urls]
homepage = "https://github.com/vllm-project/guidellm"
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import logging
import os

from datasets import config

with (
open(os.devnull, "w") as devnull, # noqa: PTH123
contextlib.redirect_stderr(devnull),
Expand All @@ -19,6 +21,7 @@
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
hf_logging.set_verbosity_error()
logging.getLogger("transformers").setLevel(logging.ERROR)
config.USE_AUDIO_DECODE = False

from .logger import configure_logger, logger
from .settings import (
Expand Down
113 changes: 89 additions & 24 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
from guidellm.backends import BackendType
from guidellm.benchmark import (
GenerativeConsoleBenchmarkerProgress,
InjectExtrasAggregator,
ProfileType,
benchmark_generative_text,
reimport_benchmarks_report,
Expand All @@ -59,6 +58,7 @@
from guidellm.mock_server import MockServer, MockServerConfig
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
from guidellm.scheduler import StrategyType
from guidellm.schemas import GenerativeRequestType
from guidellm.settings import print_config
from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
from guidellm.utils import cli as cli_tools
Expand Down Expand Up @@ -143,6 +143,7 @@ def benchmark():
@click.option(
"--data",
type=str,
multiple=True,
help=(
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
"a path to a data file csv, json, jsonl, or txt, "
Expand Down Expand Up @@ -171,12 +172,6 @@ def benchmark():
"For rate-type=synchronous,throughput, this must not be set."
),
)
@click.option(
"--random-seed",
default=GenerativeTextScenario.get_default("random_seed"),
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
# Backend configuration
@click.option(
"--backend",
Expand All @@ -197,9 +192,7 @@ def benchmark():
default=None,
help=(
"A JSON string containing any arguments to pass to the backend as a "
"dict with **kwargs. Headers can be removed by setting their value to "
"null. For example: "
"""'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
"dict with **kwargs."
),
)
@click.option(
Expand All @@ -212,6 +205,24 @@ def benchmark():
),
)
# Data configuration
@click.option(
"--request-type",
default="chat_completions",
type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
help=(
"The type of request to create for each data sample and send to the backend. "
f"Supported types: {list(get_literal_vals(GenerativeRequestType))}."
),
)
@click.option(
"--request-formatter-kwargs",
default=None,
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the request formatter "
"as a dict with **kwargs."
),
)
@click.option(
"--processor",
default=None,
Expand All @@ -233,22 +244,60 @@ def benchmark():
)
@click.option(
"--data-args",
multiple=True,
default=None,
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the dataset creation "
"as a dict with **kwargs."
),
)
@click.option(
"--data-samples",
default=-1,
type=int,
help=(
"The number of samples to use from the dataset. If -1 (default), will use all "
"samples in the dataset and dynamically generate samples. "
"If >1, will precompile that number of items from the dataset configs."
),
)
@click.option(
"--data-column-mappings",
default=None,
callback=cli_tools.parse_json,
help=(
"A JSON string of column mappings to apply to the dataset to map into request "
"column types."
),
)
@click.option(
"--data-sampler",
default=None,
type=click.Choice(["random"]),
type=click.Choice(["shuffle"]),
help="The data sampler type to use.",
)
@click.option(
"--data-num-workers",
default=1,
type=int,
help="The number of worker processes to use for data loading.",
)
@click.option(
"--dataloader_kwargs",
default=None,
callback=cli_tools.parse_json,
help=(
"The data sampler type to use. 'random' will add a random shuffle on the data. "
"Defaults to None"
"A JSON string containing any arguments to pass to the dataloader constructor "
"as a dict with **kwargs."
),
)
@click.option(
"--random-seed",
default=GenerativeTextScenario.get_default("random_seed"),
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
# Output configuration
@click.option(
"--output-path",
Expand Down Expand Up @@ -325,9 +374,9 @@ def benchmark():
),
)
@click.option(
"--request-samples",
"--sample-requests",
"--output-sampling", # legacy alias
"request_samples",
"sample_requests",
type=int,
help=(
"The number of samples for each request status and each benchmark to save "
Expand Down Expand Up @@ -378,28 +427,33 @@ def run(
data,
profile,
rate,
random_seed,
# Backend Configuration
backend,
backend_kwargs,
model,
# Data configuration
request_type,
request_formatter_kwargs,
processor,
processor_args,
data_args,
data_samples,
data_column_mappings,
data_sampler,
data_num_workers,
dataloader_kwargs,
random_seed,
# Output configuration
output_path,
output_formats,
# Updates configuration
disable_console_outputs,
disable_progress,
display_scheduler_stats,
# Aggregators configuration
output_extras,
# Benchmarker configuration
sample_requests,
warmup,
cooldown,
request_samples,
# Constraints configuration
max_seconds,
max_requests,
Expand All @@ -415,15 +469,21 @@ def run(
Supports multiple backends, data sources, output formats, and constraint types
for flexible benchmark configuration.
"""
data_request_formatter = (
request_type
if not request_formatter_kwargs
else {"request_type": request_type, **request_formatter_kwargs}
)

if HAS_UVLOOP:
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
asyncio.run(
benchmark_generative_text(
target=target,
data=data,
data=list(data),
# Benchmark configuration
profile=profile,
rate=rate,
random_seed=random_seed,
# Backend configuration
backend=backend,
backend_kwargs=backend_kwargs,
Expand All @@ -432,7 +492,13 @@ def run(
processor=processor,
processor_args=processor_args,
data_args=data_args,
data_samples=data_samples,
data_column_mapper=data_column_mappings,
data_request_formatter=data_request_formatter,
data_sampler=data_sampler,
data_num_workers=data_num_workers,
dataloader_kwargs=dataloader_kwargs,
random_seed=random_seed,
# Output configuration
output_path=output_path,
output_formats=[
Expand All @@ -451,11 +517,10 @@ def run(
else None
),
print_updates=not disable_console_outputs,
# Aggregators configuration
add_aggregators={"extras": InjectExtrasAggregator(extras=output_extras)},
# Benchmarker configuration
sample_requests=sample_requests,
warmup=warmup,
cooldown=cooldown,
request_samples=request_samples,
# Constraints configuration
max_seconds=max_seconds,
max_requests=max_requests,
Expand Down
20 changes: 12 additions & 8 deletions src/guidellm/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,22 @@
Backend,
BackendType,
)
from .objects import (
GenerationRequest,
GenerationRequestTimings,
GenerationResponse,
)
from .openai import OpenAIHTTPBackend
from .response_handlers import (
AudioResponseHandler,
ChatCompletionsResponseHandler,
GenerationResponseHandler,
GenerationResponseHandlerFactory,
TextCompletionsResponseHandler,
)

__all__ = [
"AudioResponseHandler",
"Backend",
"BackendType",
"GenerationRequest",
"GenerationRequestTimings",
"GenerationResponse",
"ChatCompletionsResponseHandler",
"GenerationResponseHandler",
"GenerationResponseHandlerFactory",
"OpenAIHTTPBackend",
"TextCompletionsResponseHandler",
]
5 changes: 3 additions & 2 deletions src/guidellm/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
from abc import abstractmethod
from typing import Literal

from guidellm.backends.objects import (
from guidellm.scheduler import BackendInterface
from guidellm.schemas import (
GenerationRequest,
GenerationResponse,
)
from guidellm.scheduler import BackendInterface
from guidellm.utils import RegistryMixin

__all__ = [
Expand Down Expand Up @@ -115,5 +115,6 @@ def requests_limit(self) -> int | None:
async def default_model(self) -> str | None:
"""
:return: The default model name or identifier for generation requests.
None if no default model is available.
"""
...
Loading