Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,465 changes: 1,331 additions & 1,134 deletions pylock.toml

Large diffs are not rendered by default.

23 changes: 14 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ include = ["*"]
[tool.pdm]
distribution = true

[[tool.pdm.source]]
name = "torch"
type = "find_links"
#url = "https://download.pytorch.org/whl/cpu/torch_stable.html"
url = "https://download.pytorch.org/whl/cpu/torch/"
include_packages = ["torch"]


# ************************************************
# ********** Project Metadata **********
Expand Down Expand Up @@ -59,23 +66,21 @@ dependencies = [
"protobuf",
"pydantic>=2.11.7",
"pydantic-settings>=2.0.0",
"pydub",
"pyyaml>=6.0.0",
"rich",
"sanic",
"transformers",
"uvloop>=0.18",
"librosa>=0.11.0",
"torch",
]

[project.optional-dependencies]
perf = [
"orjson",
"msgpack",
"msgspec",
"uvloop",
]
perf = ["orjson", "msgpack", "msgspec", "uvloop"]
recommended = [
"tiktoken>=0.11.0", # For OpenAI tokenizer
"blobfile>=3.1.0", # For OpenAI tokenizer
"tiktoken>=0.11.0", # For OpenAI tokenizer
"blobfile>=3.1.0", # For OpenAI tokenizer
]
dev = [
# build
Expand Down Expand Up @@ -118,7 +123,7 @@ dev = [
]

[dependency-groups]
dev = [ "guidellm[dev]" ]
dev = ["guidellm[dev]"]

[project.urls]
homepage = "https://github.com/vllm-project/guidellm"
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import logging
import os

from datasets.utils.logging import disable_progress_bar

with (
open(os.devnull, "w") as devnull, # noqa: PTH123
contextlib.redirect_stderr(devnull),
Expand All @@ -19,6 +21,7 @@
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
hf_logging.set_verbosity_error()
logging.getLogger("transformers").setLevel(logging.ERROR)
disable_progress_bar()

from .logger import configure_logger, logger
from .settings import (
Expand Down
99 changes: 83 additions & 16 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from guidellm.benchmark.scenario import (
GenerativeTextScenario,
)
from guidellm.data import GenerativeRequestType
from guidellm.mock_server import MockServer, MockServerConfig
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
from guidellm.scheduler import StrategyType
Expand Down Expand Up @@ -143,6 +144,7 @@ def benchmark():
@click.option(
"--data",
type=str,
multiple=True,
help=(
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
"a path to a data file csv, json, jsonl, or txt, "
Expand Down Expand Up @@ -171,12 +173,6 @@ def benchmark():
"For rate-type=synchronous,throughput, this must not be set."
),
)
@click.option(
"--random-seed",
default=GenerativeTextScenario.get_default("random_seed"),
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
# Backend configuration
@click.option(
"--backend",
Expand All @@ -197,9 +193,7 @@ def benchmark():
default=None,
help=(
"A JSON string containing any arguments to pass to the backend as a "
"dict with **kwargs. Headers can be removed by setting their value to "
"null. For example: "
"""'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
"dict with **kwargs."
),
)
@click.option(
Expand All @@ -212,6 +206,24 @@ def benchmark():
),
)
# Data configuration
@click.option(
"--request-type",
default="chat_completions",
type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
help=(
"The type of request to create for each data sample and send to the backend. "
f"Supported types: {list(get_literal_vals(GenerativeRequestType))}."
),
)
@click.option(
"--request-formatter-kwargs",
default=None,
callback=cli_tools.parse_json,
help=(
"A JSON string containing any arguments to pass to the request formatter "
"as a dict with **kwargs."
),
)
@click.option(
"--processor",
default=None,
Expand Down Expand Up @@ -240,15 +252,52 @@ def benchmark():
"as a dict with **kwargs."
),
)
@click.option(
"--data-samples",
default=-1,
type=int,
help=(
"The number of samples to use from the dataset. If -1 (default), will use all "
"samples in the dataset and dynamically generate samples. "
"If >1, will precompile that number of items from the dataset configs."
),
)
@click.option(
"--data-column-mappings",
default=None,
callback=cli_tools.parse_json,
help=(
"A JSON string of column mappings to apply to the dataset to map into request "
"column types."
),
)
@click.option(
"--data-sampler",
default=None,
type=click.Choice(["random"]),
type=click.Choice(["shuffle"]),
help="The data sampler type to use.",
)
@click.option(
"--data-num-workers",
default=1,
type=int,
help="The number of worker processes to use for data loading.",
)
@click.option(
"--dataloader_kwargs",
default=None,
callback=cli_tools.parse_json,
help=(
"The data sampler type to use. 'random' will add a random shuffle on the data. "
"Defaults to None"
"A JSON string containing any arguments to pass to the dataloader constructor "
"as a dict with **kwargs."
),
)
@click.option(
"--random-seed",
default=GenerativeTextScenario.get_default("random_seed"),
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
# Output configuration
@click.option(
"--output-path",
Expand Down Expand Up @@ -378,16 +427,22 @@ def run(
data,
profile,
rate,
random_seed,
# Backend Configuration
backend,
backend_kwargs,
model,
# Data configuration
request_type,
request_formatter_kwargs,
processor,
processor_args,
data_args,
data_samples,
data_column_mappings,
data_sampler,
data_num_workers,
dataloader_kwargs,
random_seed,
# Output configuration
output_path,
output_formats,
Expand Down Expand Up @@ -415,15 +470,21 @@ def run(
Supports multiple backends, data sources, output formats, and constraint types
for flexible benchmark configuration.
"""
data_request_formatter = (
request_type
if not request_formatter_kwargs
else {"request_type": request_type, **request_formatter_kwargs}
)

if HAS_UVLOOP:
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
asyncio.run(
benchmark_generative_text(
target=target,
data=data,
data=list(data),
# Benchmark configuration
profile=profile,
rate=rate,
random_seed=random_seed,
# Backend configuration
backend=backend,
backend_kwargs=backend_kwargs,
Expand All @@ -432,7 +493,13 @@ def run(
processor=processor,
processor_args=processor_args,
data_args=data_args,
data_samples=data_samples,
data_column_mapper=data_column_mappings,
data_request_formatter=data_request_formatter,
data_sampler=data_sampler,
data_num_workers=data_num_workers,
dataloader_kwargs=dataloader_kwargs,
random_seed=random_seed,
# Output configuration
output_path=output_path,
output_formats=[
Expand All @@ -455,7 +522,7 @@ def run(
add_aggregators={"extras": InjectExtrasAggregator(extras=output_extras)},
warmup=warmup,
cooldown=cooldown,
request_samples=request_samples,
sample_requests=request_samples,
# Constraints configuration
max_seconds=max_seconds,
max_requests=max_requests,
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
GenerationRequest,
GenerationRequestTimings,
GenerationResponse,
GenerationTokenStats,
)
from .openai import OpenAIHTTPBackend

Expand All @@ -22,5 +23,6 @@
"GenerationRequest",
"GenerationRequestTimings",
"GenerationResponse",
"GenerationTokenStats",
"OpenAIHTTPBackend",
]
1 change: 1 addition & 0 deletions src/guidellm/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,6 @@ def requests_limit(self) -> int | None:
async def default_model(self) -> str | None:
"""
:return: The default model name or identifier for generation requests.
None if no default model is available.
"""
...
Loading