vllm-project · AlonKellner-RedHat · Aug 13, 2025 · Aug 13, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -230,3 +230,6 @@ src/ui/next-env.d.ts
 !src/ui/public/manifest.json
 !src/ui/serve.json
 .eslintcache
+
+# vllm-sim
+bin/
diff --git a/pyproject.toml b/pyproject.toml
@@ -82,6 +82,7 @@ dev = [
     "pytest-cov~=5.0.0",
     "pytest-mock~=3.14.0",
     "pytest-rerunfailures~=14.0",
+    "pytest-timeout~=2.4.0",
     "respx~=0.22.0",
 
     # code quality

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -200,6 +200,33 @@ def benchmark():
         "Defaults to None."
     ),
 )
+@click.option(
+    "--max-errors",
+    type=int,
+    default=GenerativeTextScenario.get_default("max_errors"),
+    help=(
+        "The maximum number of errors allowed before stopping the benchmark. "
+        "Defaults to None."
+    ),
+)
+@click.option(
+    "--max-error-rate",
+    type=float,
+    default=GenerativeTextScenario.get_default("max_error_rate"),
+    help=(
+        "The maximum error rate allowed before stopping the benchmark. "
+        "Should be a value between 0 and 1. Defaults to None."
+    ),
+)
+@click.option(
+    "--max-global-error-rate",
+    type=float,
+    default=GenerativeTextScenario.get_default("max_global_error_rate"),
+    help=(
+        "The maximum global error rate allowed before stopping the benchmark. "
+        "Should be a value between 0 and 1. Defaults to None."
+    ),
+)
 @click.option(
     "--disable-progress",
     is_flag=True,
@@ -263,6 +290,9 @@ def run(
     max_requests,
     warmup_percent,
     cooldown_percent,
+    max_errors,
+    max_error_rate,
+    max_global_error_rate,
     disable_progress,
     display_scheduler_stats,
     disable_console_outputs,
@@ -290,6 +320,9 @@ def run(
         max_requests=max_requests,
         warmup_percent=warmup_percent,
         cooldown_percent=cooldown_percent,
+        max_errors=max_errors,
+        max_error_rate=max_error_rate,
+        max_global_error_rate=max_global_error_rate,
         output_sampling=output_sampling,
         random_seed=random_seed,
     )

diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py
@@ -5,6 +5,8 @@
 and timing utilities for standardized communication with LLM providers.
 """
 
+# Import backend implementations to trigger registration
+from . import openai  # noqa: F401
 from .backend import (
     Backend,
     BackendType,

diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py
@@ -1,19 +1,21 @@
-from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
+from .aggregator import (
+    AggregatorT,
+    GenerativeRequestsAggregator,
+    SchedulerStatsAggregator,
+)
 from .benchmark import (
     Benchmark,
-    BenchmarkArgs,
     BenchmarkMetrics,
     BenchmarkSchedulerStats,
     BenchmarkT,
     GenerativeBenchmark,
+    GenerativeBenchmarksReport,
     GenerativeMetrics,
     GenerativeRequestStats,
-    GenerativeTextErrorStats,
-    StatusBreakdown,
 )
-from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
+from .benchmarker import Benchmarker
 from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
-from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
+from .output import GenerativeBenchmarkerConsole
 from .profile import (
     AsyncProfile,
     ConcurrentProfile,
@@ -22,46 +24,37 @@
     SweepProfile,
     SynchronousProfile,
     ThroughputProfile,
-    create_profile,
 )
 from .progress import (
-    BenchmarkerProgressDisplay,
-    BenchmarkerTaskProgressState,
-    GenerativeTextBenchmarkerProgressDisplay,
-    GenerativeTextBenchmarkerTaskProgressState,
+    BenchmarkerProgress,
+    BenchmarkerProgressGroup,
+    GenerativeConsoleBenchmarkerProgress,
 )
 
 __all__ = [
     "AggregatorT",
     "AsyncProfile",
     "Benchmark",
-    "BenchmarkAggregator",
-    "BenchmarkArgs",
     "BenchmarkMetrics",
     "BenchmarkSchedulerStats",
     "BenchmarkT",
     "Benchmarker",
-    "BenchmarkerProgressDisplay",
-    "BenchmarkerResult",
-    "BenchmarkerTaskProgressState",
+    "BenchmarkerProgress",
+    "BenchmarkerProgressGroup",
     "ConcurrentProfile",
     "GenerativeBenchmark",
-    "GenerativeBenchmarkAggregator",
-    "GenerativeBenchmarker",
-    "GenerativeBenchmarksConsole",
+    "GenerativeBenchmarkerConsole",
     "GenerativeBenchmarksReport",
+    "GenerativeConsoleBenchmarkerProgress",
     "GenerativeMetrics",
     "GenerativeRequestStats",
-    "GenerativeTextBenchmarkerProgressDisplay",
-    "GenerativeTextBenchmarkerTaskProgressState",
-    "GenerativeTextErrorStats",
+    "GenerativeRequestsAggregator",
     "Profile",
     "ProfileType",
-    "StatusBreakdown",
+    "SchedulerStatsAggregator",
     "SweepProfile",
     "SynchronousProfile",
     "ThroughputProfile",
     "benchmark_generative_text",
-    "create_profile",
     "reimport_benchmarks_report",
 ]