vllm-project · markurtz · Oct 14, 2025 · Oct 1, 2025 · Oct 3, 2025 · Oct 7, 2025
diff --git a/pylock.toml b/pylock.toml
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,13 @@ include = ["*"]
 [tool.pdm]
 distribution = true
 
+[[tool.pdm.source]]
+name = "torch"
+type = "find_links"
+#url = "https://download.pytorch.org/whl/cpu/torch_stable.html"
+url = "https://download.pytorch.org/whl/cpu/torch/"
+include_packages = ["torch"]
+
 
 # ************************************************
 # ********** Project Metadata **********
@@ -54,28 +61,26 @@ dependencies = [
     "httpx[http2]<1.0.0",
     "loguru",
     "msgpack",
-    "numpy",
+    "numpy<2.0.0",
     "pillow",
     "protobuf",
     "pydantic>=2.11.7",
     "pydantic-settings>=2.0.0",
+    "pydub",
     "pyyaml>=6.0.0",
     "rich",
     "sanic",
     "transformers",
     "uvloop>=0.18",
+    "librosa>=0.11.0",
+    "torch",
 ]
 
 [project.optional-dependencies]
-perf = [
-    "orjson",
-    "msgpack",
-    "msgspec",
-    "uvloop",
-]
+perf = ["orjson", "msgpack", "msgspec", "uvloop"]
 recommended = [
-    "tiktoken>=0.11.0",  # For OpenAI tokenizer
-    "blobfile>=3.1.0",   # For OpenAI tokenizer
+    "tiktoken>=0.11.0", # For OpenAI tokenizer
+    "blobfile>=3.1.0",  # For OpenAI tokenizer
 ]
 dev = [
     # build
@@ -118,7 +123,7 @@ dev = [
 ]
 
 [dependency-groups]
-dev = [ "guidellm[dev]" ]
+dev = ["guidellm[dev]"]
 
 [project.urls]
 homepage = "https://github.com/vllm-project/guidellm"

diff --git a/src/guidellm/__init__.py b/src/guidellm/__init__.py
@@ -7,6 +7,8 @@
 import logging
 import os
 
+from datasets import config
+
 with (
     open(os.devnull, "w") as devnull,  # noqa: PTH123
     contextlib.redirect_stderr(devnull),
@@ -19,6 +21,7 @@
     os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Silence warnings for tokenizers
     hf_logging.set_verbosity_error()
     logging.getLogger("transformers").setLevel(logging.ERROR)
+    config.USE_AUDIO_DECODE = False
 
 from .logger import configure_logger, logger
 from .settings import (

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -48,7 +48,6 @@
 from guidellm.backends import BackendType
 from guidellm.benchmark import (
     GenerativeConsoleBenchmarkerProgress,
-    InjectExtrasAggregator,
     ProfileType,
     benchmark_generative_text,
     reimport_benchmarks_report,
@@ -59,6 +58,7 @@
 from guidellm.mock_server import MockServer, MockServerConfig
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
+from guidellm.schemas import GenerativeRequestType
 from guidellm.settings import print_config
 from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
 from guidellm.utils import cli as cli_tools
@@ -143,6 +143,7 @@ def benchmark():
 @click.option(
     "--data",
     type=str,
+    multiple=True,
     help=(
         "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
         "a path to a data file csv, json, jsonl, or txt, "
@@ -171,12 +172,6 @@ def benchmark():
         "For rate-type=synchronous,throughput, this must not be set."
     ),
 )
-@click.option(
-    "--random-seed",
-    default=GenerativeTextScenario.get_default("random_seed"),
-    type=int,
-    help="The random seed to use for benchmarking to ensure reproducibility.",
-)
 # Backend configuration
 @click.option(
     "--backend",
@@ -197,9 +192,7 @@ def benchmark():
     default=None,
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs. Headers can be removed by setting their value to "
-        "null. For example: "
-        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
+        "dict with **kwargs."
     ),
 )
 @click.option(
@@ -212,6 +205,24 @@ def benchmark():
     ),
 )
 # Data configuration
+@click.option(
+    "--request-type",
+    default="chat_completions",
+    type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
+    help=(
+        "The type of request to create for each data sample and send to the backend. "
+        f"Supported types: {list(get_literal_vals(GenerativeRequestType))}."
+    ),
+)
+@click.option(
+    "--request-formatter-kwargs",
+    default=None,
+    callback=cli_tools.parse_json,
+    help=(
+        "A JSON string containing any arguments to pass to the request formatter "
+        "as a dict with **kwargs."
+    ),
+)
 @click.option(
     "--processor",
     default=None,
@@ -233,22 +244,60 @@ def benchmark():
 )
 @click.option(
     "--data-args",
+    multiple=True,
     default=None,
     callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
         "as a dict with **kwargs."
     ),
 )
+@click.option(
+    "--data-samples",
+    default=-1,
+    type=int,
+    help=(
+        "The number of samples to use from the dataset. If -1 (default), will use all "
+        "samples in the dataset and dynamically generate samples. "
+        "If >1, will precompile that number of items from the dataset configs."
+    ),
+)
+@click.option(
+    "--data-column-mappings",
+    default=None,
+    callback=cli_tools.parse_json,
+    help=(
+        "A JSON string of column mappings to apply to the dataset to map into request "
+        "column types."
+    ),
+)
 @click.option(
     "--data-sampler",
     default=None,
-    type=click.Choice(["random"]),
+    type=click.Choice(["shuffle"]),
+    help="The data sampler type to use.",
+)
+@click.option(
+    "--data-num-workers",
+    default=1,
+    type=int,
+    help="The number of worker processes to use for data loading.",
+)
+@click.option(
+    "--dataloader_kwargs",
+    default=None,
+    callback=cli_tools.parse_json,
     help=(
-        "The data sampler type to use. 'random' will add a random shuffle on the data. "
-        "Defaults to None"
+        "A JSON string containing any arguments to pass to the dataloader constructor "
+        "as a dict with **kwargs."
     ),
 )
+@click.option(
+    "--random-seed",
+    default=GenerativeTextScenario.get_default("random_seed"),
+    type=int,
+    help="The random seed to use for benchmarking to ensure reproducibility.",
+)
 # Output configuration
 @click.option(
     "--output-path",
@@ -325,9 +374,9 @@ def benchmark():
     ),
 )
 @click.option(
-    "--request-samples",
+    "--sample-requests",
     "--output-sampling",  # legacy alias
-    "request_samples",
+    "sample_requests",
     type=int,
     help=(
         "The number of samples for each request status and each benchmark to save "
@@ -378,28 +427,33 @@ def run(
     data,
     profile,
     rate,
-    random_seed,
     # Backend Configuration
     backend,
     backend_kwargs,
     model,
     # Data configuration
+    request_type,
+    request_formatter_kwargs,
     processor,
     processor_args,
     data_args,
+    data_samples,
+    data_column_mappings,
     data_sampler,
+    data_num_workers,
+    dataloader_kwargs,
+    random_seed,
     # Output configuration
     output_path,
     output_formats,
     # Updates configuration
     disable_console_outputs,
     disable_progress,
     display_scheduler_stats,
-    # Aggregators configuration
-    output_extras,
+    # Benchmarker configuration
+    sample_requests,
     warmup,
     cooldown,
-    request_samples,
     # Constraints configuration
     max_seconds,
     max_requests,
@@ -415,15 +469,21 @@ def run(
     Supports multiple backends, data sources, output formats, and constraint types
     for flexible benchmark configuration.
     """
+    data_request_formatter = (
+        request_type
+        if not request_formatter_kwargs
+        else {"request_type": request_type, **request_formatter_kwargs}
+    )
+
     if HAS_UVLOOP:
         asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
     asyncio.run(
         benchmark_generative_text(
             target=target,
-            data=data,
+            data=list(data),
+            # Benchmark configuration
             profile=profile,
             rate=rate,
-            random_seed=random_seed,
             # Backend configuration
             backend=backend,
             backend_kwargs=backend_kwargs,
@@ -432,7 +492,13 @@ def run(
             processor=processor,
             processor_args=processor_args,
             data_args=data_args,
+            data_samples=data_samples,
+            data_column_mapper=data_column_mappings,
+            data_request_formatter=data_request_formatter,
             data_sampler=data_sampler,
+            data_num_workers=data_num_workers,
+            dataloader_kwargs=dataloader_kwargs,
+            random_seed=random_seed,
             # Output configuration
             output_path=output_path,
             output_formats=[
@@ -451,11 +517,10 @@ def run(
                 else None
             ),
             print_updates=not disable_console_outputs,
-            # Aggregators configuration
-            add_aggregators={"extras": InjectExtrasAggregator(extras=output_extras)},
+            # Benchmarker configuration
+            sample_requests=sample_requests,
             warmup=warmup,
             cooldown=cooldown,
-            request_samples=request_samples,
             # Constraints configuration
             max_seconds=max_seconds,
             max_requests=max_requests,

diff --git a/src/guidellm/backends/__init__.py b/src/guidellm/backends/__init__.py
@@ -9,18 +9,22 @@
     Backend,
     BackendType,
 )
-from .objects import (
-    GenerationRequest,
-    GenerationRequestTimings,
-    GenerationResponse,
-)
 from .openai import OpenAIHTTPBackend
+from .response_handlers import (
+    AudioResponseHandler,
+    ChatCompletionsResponseHandler,
+    GenerationResponseHandler,
+    GenerationResponseHandlerFactory,
+    TextCompletionsResponseHandler,
+)
 
 __all__ = [
+    "AudioResponseHandler",
     "Backend",
     "BackendType",
-    "GenerationRequest",
-    "GenerationRequestTimings",
-    "GenerationResponse",
+    "ChatCompletionsResponseHandler",
+    "GenerationResponseHandler",
+    "GenerationResponseHandlerFactory",
     "OpenAIHTTPBackend",
+    "TextCompletionsResponseHandler",
 ]
diff --git a/src/guidellm/backends/backend.py b/src/guidellm/backends/backend.py
@@ -16,11 +16,11 @@
 from abc import abstractmethod
 from typing import Literal
 
-from guidellm.backends.objects import (
+from guidellm.scheduler import BackendInterface
+from guidellm.schemas import (
     GenerationRequest,
     GenerationResponse,
 )
-from guidellm.scheduler import BackendInterface
 from guidellm.utils import RegistryMixin
 
 __all__ = [
@@ -115,5 +115,6 @@ def requests_limit(self) -> int | None:
     async def default_model(self) -> str | None:
         """
         :return: The default model name or identifier for generation requests.
+            None if no default model is available.
         """
         ...