Second Scenario implmentation attempt

sjmonson · sjmonson · commit 206fc79cc9a5 · 2025-06-06T11:29:17.000-04:00
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -7,7 +7,9 @@
 import click
 
 from guidellm.backend import BackendType
-from guidellm.benchmark import ProfileType, benchmark_generative_text
+from guidellm.benchmark import ProfileType
+from guidellm.benchmark.entrypoints import benchmark_with_scenario
+from guidellm.benchmark.scenario import GenerativeTextScenario
 from guidellm.config import print_config
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
@@ -40,6 +42,19 @@ def parse_number_str(ctx, param, value):  # noqa: ARG001
         ) from err
 
 
+def set_if_not_default(ctx: click.Context, **kwargs):
+    """
+    Set the value of a click option if it is not the default value.
+    This is useful for setting options that are not None by default.
+    """
+    values = {}
+    for k, v in kwargs.items():
+        if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT:
+            values[k] = v
+
+    return values
+
+
 @click.group()
 def cli():
     pass
@@ -48,6 +63,14 @@ def cli():
 @cli.command(
     help="Run a benchmark against a generative model using the specified arguments."
 )
+@click.option(
+    "--scenario",
+    type=str,
+    default=None,
+    help=(
+        "TODO: A scenario or path to config"
+    ),
+)
 @click.option(
     "--target",
     required=True,
@@ -61,20 +84,20 @@ def cli():
         "The type of backend to use to run requests against. Defaults to 'openai_http'."
         f" Supported types: {', '.join(get_args(BackendType))}"
     ),
-    default="openai_http",
+    default=GenerativeTextScenario.backend_type,
 )
 @click.option(
     "--backend-args",
     callback=parse_json,
-    default=None,
+    default=GenerativeTextScenario.backend_args,
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
         "dict with **kwargs."
     ),
 )
 @click.option(
     "--model",
-    default=None,
+    default=GenerativeTextScenario.model,
     type=str,
     help=(
         "The ID of the model to benchmark within the backend. "
@@ -83,7 +106,7 @@ def cli():
 )
 @click.option(
     "--processor",
-    default=None,
+    default=GenerativeTextScenario.processor,
     type=str,
     help=(
         "The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,7 +116,7 @@ def cli():
 )
 @click.option(
     "--processor-args",
-    default=None,
+    default=GenerativeTextScenario.processor_args,
     callback=parse_json,
     help=(
         "A JSON string containing any arguments to pass to the processor constructor "
@@ -112,6 +135,7 @@ def cli():
 )
 @click.option(
     "--data-args",
+    default=GenerativeTextScenario.data_args,
     callback=parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
@@ -120,7 +144,7 @@ def cli():
 )
 @click.option(
     "--data-sampler",
-    default=None,
+    default=GenerativeTextScenario.data_sampler,
     type=click.Choice(["random"]),
     help=(
         "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -138,7 +162,7 @@ def cli():
 )
 @click.option(
     "--rate",
-    default=None,
+    default=GenerativeTextScenario.rate,
     callback=parse_number_str,
     help=(
         "The rates to run the benchmark at. "
@@ -152,6 +176,7 @@ def cli():
 @click.option(
     "--max-seconds",
     type=float,
+    default=GenerativeTextScenario.max_seconds,
     help=(
         "The maximum number of seconds each benchmark can run for. "
         "If None, will run until max_requests or the data is exhausted."
@@ -160,6 +185,7 @@ def cli():
 @click.option(
     "--max-requests",
     type=int,
+    default=GenerativeTextScenario.max_requests,
     help=(
         "The maximum number of requests each benchmark can run for. "
         "If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +194,7 @@ def cli():
 @click.option(
     "--warmup-percent",
     type=float,
-    default=None,
+    default=GenerativeTextScenario.warmup_percent,
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, "
         "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +204,7 @@ def cli():
 @click.option(
     "--cooldown-percent",
     type=float,
+    default=GenerativeTextScenario.cooldown_percent,
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
         "of dataset) to run as a cooldown and not include in the final results. "
@@ -187,16 +214,19 @@ def cli():
 @click.option(
     "--disable-progress",
     is_flag=True,
+    default=not GenerativeTextScenario.show_progress,
     help="Set this flag to disable progress updates to the console",
 )
 @click.option(
     "--display-scheduler-stats",
     is_flag=True,
+    default=GenerativeTextScenario.show_progress_scheduler_stats,
     help="Set this flag to display stats for the processes running the benchmarks",
 )
 @click.option(
     "--disable-console-outputs",
     is_flag=True,
+    default=not GenerativeTextScenario.output_console,
     help="Set this flag to disable console output",
 )
 @click.option(
@@ -213,6 +243,7 @@ def cli():
 @click.option(
     "--output-extras",
     callback=parse_json,
+    default=GenerativeTextScenario.output_extras,
     help="A JSON string of extra data to save with the output benchmarks",
 )
 @click.option(
@@ -222,15 +253,16 @@ def cli():
         "The number of samples to save in the output file. "
         "If None (default), will save all samples."
     ),
-    default=None,
+    default=GenerativeTextScenario.output_sampling,
 )
 @click.option(
     "--random-seed",
-    default=42,
+    default=GenerativeTextScenario.random_seed,
     type=int,
     help="The random seed to use for benchmarking to ensure reproducibility.",
 )
 def benchmark(
+    scenario,
     target,
     backend_type,
     backend_args,
@@ -254,30 +286,48 @@ def benchmark(
     output_sampling,
     random_seed,
 ):
+    click_ctx = click.get_current_context()
+
+    # If a scenario file was specified read from it
+    # TODO: This should probably be a factory method
+    if scenario is None:
+        _scenario = {}
+    else:
+        # TODO: Support pre-defined scenarios
+        # TODO: Support other formats
+        with Path(scenario).open() as f:
+            _scenario = json.load(f)
+
+    # If any command line arguments are specified, override the scenario
+    _scenario.update(set_if_not_default(
+        click_ctx,
+        target=target,
+        backend_type=backend_type,
+        backend_args=backend_args,
+        model=model,
+        processor=processor,
+        processor_args=processor_args,
+        data=data,
+        data_args=data_args,
+        data_sampler=data_sampler,
+        rate_type=rate_type,
+        rate=rate,
+        max_seconds=max_seconds,
+        max_requests=max_requests,
+        warmup_percent=warmup_percent,
+        cooldown_percent=cooldown_percent,
+        show_progress=not disable_progress,
+        show_progress_scheduler_stats=display_scheduler_stats,
+        output_console=not disable_console_outputs,
+        output_path=output_path,
+        output_extras=output_extras,
+        output_sampling=output_sampling,
+        random_seed=random_seed,
+    ))
+
     asyncio.run(
-        benchmark_generative_text(
-            target=target,
-            backend_type=backend_type,
-            backend_args=backend_args,
-            model=model,
-            processor=processor,
-            processor_args=processor_args,
-            data=data,
-            data_args=data_args,
-            data_sampler=data_sampler,
-            rate_type=rate_type,
-            rate=rate,
-            max_seconds=max_seconds,
-            max_requests=max_requests,
-            warmup_percent=warmup_percent,
-            cooldown_percent=cooldown_percent,
-            show_progress=not disable_progress,
-            show_progress_scheduler_stats=display_scheduler_stats,
-            output_console=not disable_console_outputs,
-            output_path=output_path,
-            output_extras=output_extras,
-            output_sampling=output_sampling,
-            random_seed=random_seed,
+        benchmark_with_scenario(
+            scenario=GenerativeTextScenario(**_scenario)
         )
     )
 
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -15,9 +15,23 @@
 )
 from guidellm.benchmark.profile import ProfileType, create_profile
 from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
 from guidellm.request import GenerativeRequestLoader
 from guidellm.scheduler import StrategyType
 
+type benchmark_type = Literal["generative_text"]
+
+
+async def benchmark_with_scenario(scenario: Scenario, **kwargs):
+    """
+    Run a benchmark using a scenario and specify any extra arguments
+    """
+
+    if isinstance(scenario, GenerativeTextScenario):
+        return await benchmark_generative_text(**vars(scenario), **kwargs)
+    else:
+        raise ValueError(f"Unsupported Scenario type {type(scenario)}")
+
 
 async def benchmark_generative_text(
     target: str,
diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py
@@ -0,0 +1,62 @@
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any, Literal, Optional, Self, Union
+
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from transformers.tokenization_utils_base import (  # type: ignore[import]
+    PreTrainedTokenizerBase,
+)
+
+from guidellm.backend.backend import BackendType
+from guidellm.benchmark.profile import ProfileType
+from guidellm.objects.pydantic import StandardBaseModel
+from guidellm.scheduler.strategy import StrategyType
+
+__ALL__ = ["Scenario", "GenerativeTextScenario"]
+
+
+class Scenario(StandardBaseModel):
+    target: str
+
+    def _update(self, **fields: Any) -> Self:
+        for k, v in fields.items():
+            if not hasattr(self, k):
+                raise ValueError(f"Invalid field {k}")
+            setattr(self, k, v)
+
+        return self
+
+    def update(self, **fields: Any) -> Self:
+        return self._update(**{k: v for k, v in fields.items() if v is not None})
+
+
+class GenerativeTextScenario(Scenario):
+    backend_type: BackendType = "openai_http"
+    backend_args: Optional[dict[str, Any]] = None
+    model: Optional[str] = None
+    processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
+    processor_args: Optional[dict[str, Any]] = None
+    data: Union[
+        str,
+        Path,
+        Iterable[Union[str, dict[str, Any]]],
+        Dataset,
+        DatasetDict,
+        IterableDataset,
+        IterableDatasetDict,
+    ]
+    data_args: Optional[dict[str, Any]] = None
+    data_sampler: Optional[Literal["random"]] = None
+    rate_type: Union[StrategyType, ProfileType]
+    rate: Optional[Union[int, float, list[Union[int, float]]]] = None
+    max_seconds: Optional[float] = None
+    max_requests: Optional[int] = None
+    warmup_percent: Optional[float] = None
+    cooldown_percent: Optional[float] = None
+    show_progress: bool = True
+    show_progress_scheduler_stats: bool = True
+    output_console: bool = True
+    output_path: Optional[Union[str, Path]] = None
+    output_extras: Optional[dict[str, Any]] = None
+    output_sampling: Optional[int] = None
+    random_seed: int = 42