style + type fixes

markVaykhansky · markVaykhansky · commit 640744c4c8ee · 2025-06-05T14:32:21.000+03:00
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -164,14 +164,19 @@ def cli():
     ),
 )
 @click.option(
-    "--max-error-rate",
+    "--max-error",
     type=float,
     help=(
-        "The maximum error rate after which a benchmark will stop. "
-        "Applicable only for finite deterministic scenarios i.e "
-        "rate_type is 'constant' and 'max_seconds' exists OR "
-        "'max_requests' exists OR the dataset is finite. "
-        "If None or not applicable, benchmarks will continue regardless of error rate."
+        "The maximum error after which a benchmark will stop. "
+        "Can either be a rate i.e 0 < rate < 1 or constant number. "
+        "If rate is given and rate_type is 'constant' and 'max_seconds' exists "
+        "then the rate will be calculated as part of the total expected "
+        "requests count i.e rate * duration. If rate is given and number"
+        "of requests is not pre-determined than a context window "
+        "of the last requests will be looked at. Context window size"
+        "is configurable under GUIDELLM__ERROR_CHECK_WINDOW_SIZE."
+        "If a number above 1 is given than we just count the total"
+        "number of error and check if it's above the threshold."
     ),
 )
 @click.option(
@@ -253,7 +258,7 @@ def benchmark(
     rate,
     max_seconds,
     max_requests,
-    max_error_rate,
+    max_error,
     warmup_percent,
     cooldown_percent,
     disable_progress,
@@ -279,7 +284,7 @@ def benchmark(
             rate=rate,
             max_seconds=max_seconds,
             max_requests=max_requests,
-            max_error_rate=max_error_rate,
+            max_error=max_error,
             warmup_percent=warmup_percent,
             cooldown_percent=cooldown_percent,
             show_progress=not disable_progress,
diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py
@@ -90,8 +90,8 @@ class BenchmarkArgs(StandardBaseModel):
     max_duration: Optional[float] = Field(
         description="The maximum duration in seconds to run this benchmark, if any."
     )
-    max_error_rate: Optional[float] = Field(
-        description="Maximum error rate after which a benchmark will stop."
+    max_error: Optional[float] = Field(
+        description="Maximum error rate or const after which a benchmark will stop."
     )
     warmup_number: Optional[int] = Field(
         description=(
@@ -220,7 +220,7 @@ class BenchmarkRunStats(StandardBaseModel):
         description=(
             "The number of errored requests divided by the number "
             "of successful and errored requests. "
-            "This can be higher than max_error_rate "
+            "This can be higher than max_error "
             "(if applicable) cause it does not take into "
             "account incomplete requests."
         )
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
@@ -74,8 +74,10 @@ class BenchmarkerStrategyLimits(StandardBaseModel):
         description="Maximum duration (in seconds) to process requests per strategy.",
         ge=0,
     )
-    max_error_rate: Optional[float] = Field(
-        description="Maximum error rate after which a benchmark will stop",
+    max_error: Optional[float] = Field(
+        description="Maximum error after which a "
+        "benchmark will stop,"
+        " either rate or fixed number",
         ge=0,
     )
     warmup_percent_per_strategy: Optional[float] = Field(
@@ -152,7 +154,7 @@ async def run(
         profile: Profile,
         max_number_per_strategy: Optional[int],
         max_duration_per_strategy: Optional[float],
-        max_error_rate: Optional[float],
+        max_error: Optional[float],
         warmup_percent_per_strategy: Optional[float],
         cooldown_percent_per_strategy: Optional[float],
     ) -> AsyncGenerator[
@@ -167,7 +169,7 @@ async def run(
             requests_loader_size=requests_loader_size,
             max_number_per_strategy=max_number_per_strategy,
             max_duration_per_strategy=max_duration_per_strategy,
-            max_error_rate=max_error_rate,
+            max_error=max_error,
             warmup_percent_per_strategy=warmup_percent_per_strategy,
             cooldown_percent_per_strategy=cooldown_percent_per_strategy,
         )
@@ -202,7 +204,7 @@ async def run(
                 scheduling_strategy=scheduling_strategy,
                 max_number=max_number_per_strategy,
                 max_duration=max_duration_per_strategy,
-                max_error_rate=max_error_rate,
+                max_error=max_error,
             ):
                 if result.type_ == "run_start":
                     yield BenchmarkerResult(
@@ -328,7 +330,7 @@ def create_benchmark_aggregator(
                 strategy=strategy,
                 max_number=limits.max_number,
                 max_duration=limits.max_duration,
-                max_error_rate=limits.max_error_rate,
+                max_error=limits.max_error,
                 warmup_number=limits.warmup_number,
                 warmup_duration=limits.warmup_duration,
                 cooldown_number=limits.cooldown_number,
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -41,7 +41,7 @@ async def benchmark_generative_text(
     rate: Optional[Union[float, list[float]]],
     max_seconds: Optional[float],
     max_requests: Optional[int],
-    max_error_rate: Optional[float],
+    max_error: Optional[float],
     warmup_percent: Optional[float],
     cooldown_percent: Optional[float],
     show_progress: bool,
@@ -108,7 +108,7 @@ async def benchmark_generative_text(
         profile=profile,
         max_number_per_strategy=max_requests,
         max_duration_per_strategy=max_seconds,
-        max_error_rate=max_error_rate,
+        max_error=max_error,
         warmup_percent_per_strategy=warmup_percent,
         cooldown_percent_per_strategy=cooldown_percent,
     ):
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
@@ -419,7 +419,7 @@ def benchmarks_args_str(self) -> str:
             {
                 "max_number": args.max_number,
                 "max_duration": args.max_duration,
-                "max_error_rate": args.max_error_rate,
+                "max_error": args.max_error,
                 "warmup_number": args.warmup_number,
                 "warmup_duration": args.warmup_duration,
                 "cooldown_number": args.cooldown_number,
diff --git a/src/guidellm/scheduler/result.py b/src/guidellm/scheduler/result.py
@@ -51,7 +51,7 @@ class SchedulerRunInfo(StandardBaseModel):
     processes: int
     strategy: SchedulingStrategy
     last_requests_statuses: deque[RequestStatus]
-    max_error_rate: Optional[float] = None
+    max_error: Optional[float] = None
 
     created_requests: int = 0
     queued_requests: int = 0
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
@@ -76,7 +76,7 @@ async def run(
         scheduling_strategy: SchedulingStrategy,
         max_number: Optional[int] = None,
         max_duration: Optional[float] = None,
-        max_error_rate: Optional[float] = None,
+        max_error: Optional[float] = None,
     ) -> AsyncGenerator[
         Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
     ]:
@@ -105,16 +105,16 @@ async def run(
         :param max_duration: The maximum duration for the scheduling run.
             If None, then no limit is set and either the iterator must be exhaustible
             or the max_number must be set.
-        :param max_error_rate: The maximum error rate after which the
-            scheduler shuts down.
+        :param max_error: The maximum error rate or const
+            after which the scheduler shuts down.
             Only applicable in benchmarks with finite deterministic number of requests.
             If None or not applicable then scheduler will continue regardless of errors.
         :return: An asynchronous generator that yields SchedulerResult objects.
             Each SchedulerResult object contains information about the request,
             the response, and the run information.
         """
         self._validate_scheduler_params(
-            scheduling_strategy, max_duration, max_error_rate, max_number
+            scheduling_strategy, max_duration, max_error, max_number
         )
 
         with (
@@ -134,16 +134,16 @@ async def run(
                 raise RuntimeError("shutdown_event is set before starting scheduling")
 
             run_info, requests_iter, times_iter = self._run_setup(
-                futures, scheduling_strategy, max_number, max_duration, max_error_rate
+                futures, scheduling_strategy, max_number, max_duration, max_error
             )
             yield SchedulerResult(
                 type_="run_start",
                 run_info=run_info,
             )
 
             try:
-                max_error_rate_reached = False
-                while not max_error_rate_reached:
+                max_error_reached = False
+                while not max_error_reached:
                     # check errors and raise them
                     for future in futures:
                         if future.done() and (err := future.exception()) is not None:
@@ -173,13 +173,13 @@ async def run(
                         if (
                             iter_result.request_info.errored
                             and not iter_result.request_info.canceled
-                            and self._is_max_error_rate_reached(iter_result.run_info)
+                            and self._is_max_error_reached(iter_result.run_info)
                         ):
                             shutdown_event.set()
-                            max_error_rate_reached = True
+                            max_error_reached = True
                             logger.info(
                                 f"Max error rate of "
-                                f"({iter_result.run_info.max_error_rate}) "
+                                f"({iter_result.run_info.max_error}) "
                                 f"reached, sending shutdown signal"
                             )
                         yield iter_result
@@ -200,7 +200,7 @@ def _validate_scheduler_params(
         self,
         scheduling_strategy: SchedulingStrategy,
         max_duration: Optional[float],
-        max_error_rate: Optional[float],
+        max_error: Optional[float],
         max_number: Optional[int],
     ) -> None:
         if scheduling_strategy is None or not isinstance(
@@ -211,11 +211,11 @@ def _validate_scheduler_params(
             raise ValueError(f"Invalid max_number: {max_number}")
         if max_duration is not None and max_duration < 0:
             raise ValueError(f"Invalid max_duration: {max_duration}")
-        if max_error_rate is not None and (max_error_rate < 0):
-            raise ValueError(f"Invalid max_error_rate: {max_error_rate}")
+        if max_error is not None and (max_error < 0):
+            raise ValueError(f"Invalid max_error: {max_error}")
 
-    def _is_max_error_rate_reached(self, run_info: SchedulerRunInfo) -> bool:
-        max_error = run_info.max_error_rate
+    def _is_max_error_reached(self, run_info: SchedulerRunInfo) -> bool:
+        max_error = run_info.max_error
         if max_error is None:
             return False
 
@@ -322,7 +322,7 @@ def _run_setup(
         scheduling_strategy: SchedulingStrategy,
         max_number: Optional[int],
         max_duration: Optional[float],
-        max_error_rate: Optional[float],
+        max_error: Optional[float],
     ) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
         requests_iter = iter(self.request_loader)
         start_time = time.time()
@@ -344,7 +344,7 @@ def _run_setup(
             end_number=end_number,
             processes=len(processes),
             strategy=scheduling_strategy,
-            max_error_rate=max_error_rate,
+            max_error=max_error,
             last_requests_statuses=collections.deque(
                 maxlen=settings.error_check_window_size
             ),

Original file line number	Diff line number	Diff line change
`@@ -90,8 +90,8 @@ class BenchmarkArgs(StandardBaseModel):`
`90`	`90`	`max_duration: Optional[float] = Field(`
`91`	`91`	`description="The maximum duration in seconds to run this benchmark, if any."`
`92`	`92`	`)`
`93`		`- max_error_rate: Optional[float] = Field(`
`94`		`- description="Maximum error rate after which a benchmark will stop."`
	`93`	`+ max_error: Optional[float] = Field(`
	`94`	`+ description="Maximum error rate or const after which a benchmark will stop."`
`95`	`95`	`)`
`96`	`96`	`warmup_number: Optional[int] = Field(`
`97`	`97`	`description=(`
`@@ -220,7 +220,7 @@ class BenchmarkRunStats(StandardBaseModel):`
`220`	`220`	`description=(`
`221`	`221`	`"The number of errored requests divided by the number "`
`222`	`222`	`"of successful and errored requests. "`
`223`		`- "This can be higher than max_error_rate "`
	`223`	`+ "This can be higher than max_error "`
`224`	`224`	`"(if applicable) cause it does not take into "`
`225`	`225`	`"account incomplete requests."`
`226`	`226`	`)`
Original file line number	Diff line number	Diff line change
`@@ -419,7 +419,7 @@ def benchmarks_args_str(self) -> str:`
`419`	`419`	`{`
`420`	`420`	`"max_number": args.max_number,`
`421`	`421`	`"max_duration": args.max_duration,`
`422`		`- "max_error_rate": args.max_error_rate,`
	`422`	`+ "max_error": args.max_error,`
`423`	`423`	`"warmup_number": args.warmup_number,`
`424`	`424`	`"warmup_duration": args.warmup_duration,`
`425`	`425`	`"cooldown_number": args.cooldown_number,`