vllm-project · markurtz · Oct 1, 2025 · Oct 3, 2025 · Oct 7, 2025
diff --git a/pylock.toml b/pylock.toml
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,13 @@ include = ["*"]
 [tool.pdm]
 distribution = true
 
+[[tool.pdm.source]]
+name = "torch"
+type = "find_links"
+#url = "https://download.pytorch.org/whl/cpu/torch_stable.html"
+url = "https://download.pytorch.org/whl/cpu/torch/"
+include_packages = ["torch"]
+
 
 # ************************************************
 # ********** Project Metadata **********
@@ -59,23 +66,21 @@ dependencies = [
     "protobuf",
     "pydantic>=2.11.7",
     "pydantic-settings>=2.0.0",
+    "pydub",
     "pyyaml>=6.0.0",
     "rich",
     "sanic",
     "transformers",
     "uvloop>=0.18",
+    "librosa>=0.11.0",
+    "torch",
 ]
 
 [project.optional-dependencies]
-perf = [
-    "orjson",
-    "msgpack",
-    "msgspec",
-    "uvloop",
-]
+perf = ["orjson", "msgpack", "msgspec", "uvloop"]
 recommended = [
-    "tiktoken>=0.11.0",  # For OpenAI tokenizer
-    "blobfile>=3.1.0",   # For OpenAI tokenizer
+    "tiktoken>=0.11.0", # For OpenAI tokenizer
+    "blobfile>=3.1.0",  # For OpenAI tokenizer
 ]
 dev = [
     # build
@@ -118,7 +123,7 @@ dev = [
 ]
 
 [dependency-groups]
-dev = [ "guidellm[dev]" ]
+dev = ["guidellm[dev]"]
 
 [project.urls]
 homepage = "https://github.com/vllm-project/guidellm"

diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py
@@ -7,6 +7,8 @@
 import logging
 import os
 
+from datasets.utils.logging import disable_progress_bar
+
 with (
     open(os.devnull, "w") as devnull,  # noqa: PTH123
     contextlib.redirect_stderr(devnull),
@@ -19,6 +21,7 @@
     os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Silence warnings for tokenizers
     hf_logging.set_verbosity_error()
     logging.getLogger("transformers").setLevel(logging.ERROR)
+    disable_progress_bar()
 
 from .logger import configure_logger, logger
 from .settings import (

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -56,6 +56,7 @@
 from guidellm.benchmark.scenario import (
     GenerativeTextScenario,
 )
+from guidellm.data import GenerativeRequestType
 from guidellm.mock_server import MockServer, MockServerConfig
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
@@ -143,6 +144,7 @@ def benchmark():
 @click.option(
     "--data",
     type=str,
+    multiple=True,
     help=(
         "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
         "a path to a data file csv, json, jsonl, or txt, "
@@ -171,12 +173,6 @@ def benchmark():
         "For rate-type=synchronous,throughput, this must not be set."
     ),
 )
-@click.option(
-    "--random-seed",
-    default=GenerativeTextScenario.get_default("random_seed"),
-    type=int,
-    help="The random seed to use for benchmarking to ensure reproducibility.",
-)
 # Backend configuration
 @click.option(
     "--backend",
@@ -197,9 +193,7 @@ def benchmark():
     default=None,
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs. Headers can be removed by setting their value to "
-        "null. For example: "
-        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
+        "dict with **kwargs."
     ),
 )
 @click.option(
@@ -212,6 +206,24 @@ def benchmark():
     ),
 )
 # Data configuration
+@click.option(
+    "--request-type",
+    default="chat_completions",
+    type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
+    help=(
+        "The type of request to create for each data sample and send to the backend. "
+        f"Supported types: {list(get_literal_vals(GenerativeRequestType))}."
+    ),
+)
+@click.option(
+    "--request-formatter-kwargs",
+    default=None,
+    callback=cli_tools.parse_json,
+    help=(
+        "A JSON string containing any arguments to pass to the request formatter "
+        "as a dict with **kwargs."
+    ),
+)
 @click.option(
     "--processor",
     default=None,
@@ -240,15 +252,52 @@ def benchmark():
         "as a dict with **kwargs."
     ),
 )
+@click.option(
+    "--data-samples",
+    default=-1,
+    type=int,
+    help=(
+        "The number of samples to use from the dataset. If -1 (default), will use all "
+        "samples in the dataset and dynamically generate samples. "
+        "If >1, will precompile that number of items from the dataset configs."
+    ),
+)
+@click.option(
+    "--data-column-mappings",
+    default=None,
+    callback=cli_tools.parse_json,
+    help=(
+        "A JSON string of column mappings to apply to the dataset to map into request "
+        "column types."
+    ),
+)
 @click.option(
     "--data-sampler",
     default=None,
-    type=click.Choice(["random"]),
+    type=click.Choice(["shuffle"]),
+    help="The data sampler type to use.",
+)
+@click.option(
+    "--data-num-workers",
+    default=1,
+    type=int,
+    help="The number of worker processes to use for data loading.",
+)
+@click.option(
+    "--dataloader_kwargs",
+    default=None,
+    callback=cli_tools.parse_json,
     help=(
-        "The data sampler type to use. 'random' will add a random shuffle on the data. "
-        "Defaults to None"
+        "A JSON string containing any arguments to pass to the dataloader constructor "
+        "as a dict with **kwargs."
     ),
 )
+@click.option(
+    "--random-seed",
+    default=GenerativeTextScenario.get_default("random_seed"),
+    type=int,
+    help="The random seed to use for benchmarking to ensure reproducibility.",
+)
 # Output configuration
 @click.option(
     "--output-path",
@@ -378,16 +427,22 @@ def run(
     data,
     profile,
     rate,
-    random_seed,
     # Backend Configuration
     backend,
     backend_kwargs,
     model,
     # Data configuration
+    request_type,
+    request_formatter_kwargs,
     processor,
     processor_args,
     data_args,
+    data_samples,
+    data_column_mappings,
     data_sampler,
+    data_num_workers,
+    dataloader_kwargs,
+    random_seed,
     # Output configuration
     output_path,
     output_formats,
@@ -415,15 +470,21 @@ def run(
     Supports multiple backends, data sources, output formats, and constraint types
     for flexible benchmark configuration.
     """
+    data_request_formatter = (
+        request_type
+        if not request_formatter_kwargs
+        else {"request_type": request_type, **request_formatter_kwargs}
+    )
+
     if HAS_UVLOOP:
         asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
     asyncio.run(
         benchmark_generative_text(
             target=target,
-            data=data,
+            data=list(data),
+            # Benchmark configuration
             profile=profile,
             rate=rate,
-            random_seed=random_seed,
             # Backend configuration
             backend=backend,
             backend_kwargs=backend_kwargs,
@@ -432,7 +493,13 @@ def run(
             processor=processor,
             processor_args=processor_args,
             data_args=data_args,
+            data_samples=data_samples,
+            data_column_mapper=data_column_mappings,
+            data_request_formatter=data_request_formatter,
             data_sampler=data_sampler,
+            data_num_workers=data_num_workers,
+            dataloader_kwargs=dataloader_kwargs,
+            random_seed=random_seed,
             # Output configuration
             output_path=output_path,
             output_formats=[
@@ -455,7 +522,7 @@ def run(
             add_aggregators={"extras": InjectExtrasAggregator(extras=output_extras)},
             warmup=warmup,
             cooldown=cooldown,
-            request_samples=request_samples,
+            sample_requests=request_samples,
             # Constraints configuration
             max_seconds=max_seconds,
             max_requests=max_requests,

diff --git a/src/guidellm/backends/__init__.py b/src/guidellm/backends/__init__.py
@@ -13,6 +13,7 @@
     GenerationRequest,
     GenerationRequestTimings,
     GenerationResponse,
+    GenerationTokenStats,
 )
 from .openai import OpenAIHTTPBackend
 
@@ -22,5 +23,6 @@
     "GenerationRequest",
     "GenerationRequestTimings",
     "GenerationResponse",
+    "GenerationTokenStats",
     "OpenAIHTTPBackend",
 ]
diff --git a/src/guidellm/backends/backend.py b/src/guidellm/backends/backend.py
@@ -115,5 +115,6 @@ def requests_limit(self) -> int | None:
     async def default_model(self) -> str | None:
         """
         :return: The default model name or identifier for generation requests.
+            None if no default model is available.
         """
         ...