vllm-project
diff --git a/‎src/guidellm/benchmark/aggregator.py
Lines changed: 16 additions & 8 deletions b/‎src/guidellm/benchmark/aggregator.py
Lines changed: 16 additions & 8 deletions
diff --git a/‎src/guidellm/benchmark/benchmark.py
Lines changed: 22 additions & 1 deletion b/‎src/guidellm/benchmark/benchmark.py
Lines changed: 22 additions & 1 deletion
diff --git a/‎src/guidellm/benchmark/benchmarker.py
Lines changed: 11 additions & 4 deletions b/‎src/guidellm/benchmark/benchmarker.py
Lines changed: 11 additions & 4 deletions
diff --git a/‎src/guidellm/benchmark/entrypoints.py
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/benchmark/entrypoints.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/scheduler/result.py
Lines changed: 5 additions & 0 deletions b/‎src/guidellm/scheduler/result.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/guidellm/scheduler/scheduler.py
Lines changed: 6 additions & 0 deletions b/‎src/guidellm/scheduler/scheduler.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/e2e/test_basic.py
Lines changed: 0 additions & 60 deletions b/‎tests/e2e/test_basic.py
Lines changed: 0 additions & 60 deletions
diff --git a/‎tests/e2e/test_interrupted.py renamed to ‎tests/e2e/test_failed_benchmark.py
Lines changed: 9 additions & 0 deletions b/‎tests/e2e/test_interrupted.py renamed to ‎tests/e2e/test_failed_benchmark.py
Lines changed: 9 additions & 0 deletions
@@ -1,19 +1,13 @@
 import time
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import (
-    Any,
-    Generic,
-    Literal,
-    Optional,
-    TypeVar,
-    Union,
-)
+from typing import Any, Generic, Literal, Optional, TypeVar, Union, get_args
 
 from pydantic import Field
 
 from guidellm.backend import ResponseSummary
 from guidellm.benchmark.benchmark import (
+    REASON_STATUS_MAPPING,
     BenchmarkArgs,
     BenchmarkRunStats,
     BenchmarkT,
@@ -40,6 +34,7 @@
     SchedulerRequestResult,
     WorkerDescription,
 )
+from guidellm.scheduler.result import TerminationReason
 from guidellm.utils import check_load_processor
 
 __all__ = [
@@ -305,6 +300,12 @@ class BenchmarkAggregator(
             total=None,
         ),
     )
+    termination_reason: TerminationReason = Field(
+        description=(
+            f"The benchmark termination reason, one of: {get_args(TerminationReason)}"
+        ),
+        default="interrupted",
+    )
 
     def add_result(
         self,
@@ -444,6 +445,9 @@ def add_result(
 
         return True
 
+    def set_termination_reason(self, termination_reason: TerminationReason) -> None:
+        self.termination_reason = termination_reason
+
     @abstractmethod
     def compile(self) -> BenchmarkT:
         """
@@ -602,6 +606,8 @@ def compile(self) -> GenerativeBenchmark:
 
         error_rate = self._calculate_error_rate()
 
+        termination_reason = self.termination_reason
+
         return GenerativeBenchmark.from_stats(
             run_id=self.run_id,
             successful=successful,
@@ -628,6 +634,8 @@ def compile(self) -> GenerativeBenchmark:
                 request_time_delay_avg=self.requests_stats.request_time_delay.mean,
                 request_time_avg=self.requests_stats.request_time.mean,
                 error_rate=error_rate,
+                status=REASON_STATUS_MAPPING[termination_reason],
+                termination_reason=termination_reason,
             ),
             worker=self.worker_description,
             requests_loader=self.request_loader_description,
 
@@ -1,6 +1,6 @@
 import random
 import uuid
-from typing import Any, Literal, Optional, TypeVar, Union
+from typing import Any, Literal, Optional, TypeVar, Union, get_args
 
 from pydantic import Field, computed_field
 
@@ -32,6 +32,7 @@
     ThroughputStrategy,
     WorkerDescription,
 )
+from guidellm.scheduler.result import TerminationReason
 
 __all__ = [
     "Benchmark",
@@ -46,6 +47,14 @@
     "StatusBreakdown",
 ]
 
+BenchmarkStatus = Literal["success", "error", "interrupted"]
+REASON_STATUS_MAPPING: dict[TerminationReason, BenchmarkStatus] = {
+    "interrupted": "interrupted",
+    "max_error_reached": "error",
+    "max_seconds_reached": "success",
+    "max_requests_reached": "success",
+}
+
 
 class BenchmarkArgs(StandardBaseModel):
     """
@@ -225,6 +234,18 @@ class BenchmarkRunStats(StandardBaseModel):
             "account incomplete requests."
         )
     )
+    status: BenchmarkStatus = Field(
+        description=(
+            f"The status of the benchmark output, "
+            f"one of the following options: {get_args(BenchmarkStatus)}."
+        )
+    )
+    termination_reason: TerminationReason = Field(
+        description=(
+            "The reason for the benchmark termination, "
+            f"one of the following options: {get_args(TerminationReason)}."
+        )
+    )
 
 
 class BenchmarkMetrics(StandardBaseModel):
 
@@ -74,7 +74,7 @@ class BenchmarkerStrategyLimits(StandardBaseModel):
         description="Maximum duration (in seconds) to process requests per strategy.",
         ge=0,
     )
-    max_error: Optional[float] = Field(
+    max_error_per_strategy: Optional[float] = Field(
         description="Maximum error after which a "
         "benchmark will stop,"
         " either rate or fixed number",
@@ -105,6 +105,10 @@ def max_number(self) -> Optional[int]:
     def max_duration(self) -> Optional[float]:
         return self.max_duration_per_strategy
 
+    @property
+    def max_error(self) -> Optional[float]:
+        return self.max_error_per_strategy
+
     @property
     def warmup_number(self) -> Optional[int]:
         if self.warmup_percent_per_strategy is None or self.max_number is None:
@@ -154,7 +158,7 @@ async def run(
         profile: Profile,
         max_number_per_strategy: Optional[int],
         max_duration_per_strategy: Optional[float],
-        max_error: Optional[float],
+        max_error_per_strategy: Optional[float],
         warmup_percent_per_strategy: Optional[float],
         cooldown_percent_per_strategy: Optional[float],
     ) -> AsyncGenerator[
@@ -169,7 +173,7 @@ async def run(
             requests_loader_size=requests_loader_size,
             max_number_per_strategy=max_number_per_strategy,
             max_duration_per_strategy=max_duration_per_strategy,
-            max_error=max_error,
+            max_error_per_strategy=max_error_per_strategy,
             warmup_percent_per_strategy=warmup_percent_per_strategy,
             cooldown_percent_per_strategy=cooldown_percent_per_strategy,
         )
@@ -204,7 +208,7 @@ async def run(
                 scheduling_strategy=scheduling_strategy,
                 max_number=max_number_per_strategy,
                 max_duration=max_duration_per_strategy,
-                max_error=max_error,
+                max_error=max_error_per_strategy,
             ):
                 if result.type_ == "run_start":
                     yield BenchmarkerResult(
@@ -219,6 +223,9 @@ async def run(
                         current_result=None,
                     )
                 elif result.type_ == "run_complete":
+                    aggregator.set_termination_reason(
+                        result.run_info.termination_reason
+                    )
                     yield BenchmarkerResult(
                         type_="scheduler_complete",
                         start_time=start_time,
 
@@ -120,7 +120,7 @@ async def benchmark_generative_text(
         profile=profile,
         max_number_per_strategy=max_requests,
         max_duration_per_strategy=max_seconds,
-        max_error=max_error,
+        max_error_per_strategy=max_error,
         warmup_percent_per_strategy=warmup_percent,
         cooldown_percent_per_strategy=cooldown_percent,
     ):
 
@@ -18,6 +18,9 @@
 
 
 RequestStatus = Literal["success", "error"]
+TerminationReason = Literal[
+    "interrupted", "max_error_reached", "max_seconds_reached", "max_requests_reached"
+]
 
 
 class SchedulerRunInfo(StandardBaseModel):
@@ -60,6 +63,8 @@ class SchedulerRunInfo(StandardBaseModel):
     completed_requests: int = 0
     errored_requests: int = 0
 
+    termination_reason: TerminationReason = "interrupted"
+
 
 class SchedulerRequestInfo(StandardBaseModel):
     """
 
@@ -177,6 +177,7 @@ async def run(
                         ):
                             shutdown_event.set()
                             max_error_reached = True
+                            run_info.termination_reason = "max_error_reached"
                             logger.info(
                                 f"Max error rate of "
                                 f"({iter_result.run_info.max_error}) "
@@ -394,11 +395,16 @@ def _add_requests(
                     and added_count < settings.max_add_requests_per_loop
                 ):
                     if run_info.created_requests >= run_info.end_number:
+                        if time.time() >= run_info.end_time - 1:
+                            run_info.termination_reason = "max_seconds_reached"
+                        else:
+                            run_info.termination_reason = "max_requests_reached"
                         raise StopIteration
 
                     if (
                         request_time := next(times_iter)
                     ) >= run_info.end_time or time.time() >= run_info.end_time:
+                        run_info.termination_reason = "max_seconds_reached"
                         raise StopIteration
 
                     request = next(requests_iter)
 
@@ -74,6 +74,15 @@ def test_interrupted_report(server: VllmSimServer):
         assert "errored" in requests
         errored = requests["errored"]
         assert len(errored) / (len(successful) + len(errored)) > max_error_rate
+
+        assert "run_stats" in benchmark
+        run_stats = benchmark["run_stats"]
+        assert "status" in run_stats
+        status = run_stats["status"]
+        assert status == "error"
+        assert "termination_reason" in run_stats
+        termination_reason = run_stats["termination_reason"]
+        assert termination_reason == "max_error_reached"
     finally:
         if report_path.exists():
             report_path.unlink()