[GuideLLM Refactor] Edge case errors (#376)

jaredoconnell · web-flow · commit 46a2c1ed14f9 · 2025-09-29T12:15:17.000-04:00
## Summary

This PR handles errors that occur when there are no successful requests.
There will obviously still be an error, but it will be one that the user
can get useful information from, rather than one that is the inner
workings breaking.

## Details

- Adds default value for an inner data type to allow it to work in this
edge case.
- Adds an error check that creates a runtime error with an explanation
for the failure. The error message can be changed if you would like the
wording changed.
- Fixes a type literal mismatch.

## Test Plan

- Run GuideLLM against a mock server in a way that results in all
requests failing. Like setting the max token value way too small.

---

- [x] "I certify that all code in this PR is my own, except as noted
below."

## Use of AI

- [ ] Includes AI-assisted code completion
- [ ] Includes code generated by an AI application
- [ ] Includes AI-generated tests (NOTE: AI written tests should have a
docstring that includes `## WRITTEN BY AI ##`)

---------

Signed-off-by: Jared O'Connell &lt;joconnel@redhat.com&gt;
diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py
@@ -441,7 +441,7 @@ def __call__(
 
     def compile(
         self, state: AggregatorState, scheduler_state: SchedulerState
-    ) -> dict[Literal["scheduler_stats"], BenchmarkSchedulerStats]:
+    ) -> dict[Literal["run_stats"], BenchmarkSchedulerStats]:
         """
         Compile scheduler timing metrics into benchmark statistics.
 
@@ -473,7 +473,7 @@ def compile(
                     key="worker_resolve_time", type_="avg", default=0.0
                 ),
                 worker_resolve_end_delay_avg=state.get_metric(
-                    key="worker_resolve_end_delay", type_="avg"
+                    key="worker_resolve_end_delay", type_="avg", default=0.0
                 ),
                 finalized_delay_avg=state.get_metric(
                     key="finalized_delay", type_="avg", default=0.0
diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py
@@ -680,6 +680,8 @@ def next_strategy(
             self.throughput_rate = (
                 prev_benchmark.metrics.requests_per_second.successful.mean
             )
+            if self.synchronous_rate <= 0 and self.throughput_rate <= 0:
+                raise RuntimeError("Invalid rates in sweep; aborting. Were there any successful requests?")
             self.measured_rates = list(
                 np.linspace(
                     self.synchronous_rate,
@@ -698,7 +700,6 @@ def next_strategy(
                 if strat.type_ == self.strategy_type
             ]
         )
-
         if self.strategy_type == "constant":
             return AsyncConstantStrategy(
                 rate=self.measured_rates[next_rate_index],

Original file line number	Diff line number	Diff line change
`@@ -680,6 +680,8 @@ def next_strategy(`
`680`	`680`	`self.throughput_rate = (`
`681`	`681`	`prev_benchmark.metrics.requests_per_second.successful.mean`
`682`	`682`	`)`
	`683`	`+ if self.synchronous_rate <= 0 and self.throughput_rate <= 0:`
	`684`	`+ raise RuntimeError("Invalid rates in sweep; aborting. Were there any successful requests?")`
`683`	`685`	`self.measured_rates = list(`
`684`	`686`	`np.linspace(`
`685`	`687`	`self.synchronous_rate,`
`@@ -698,7 +700,6 @@ def next_strategy(`
`698`	`700`	`if strat.type_ == self.strategy_type`
`699`	`701`	`]`
`700`	`702`	`)`
`701`		`-`
`702`	`703`	`if self.strategy_type == "constant":`
`703`	`704`	`return AsyncConstantStrategy(`
`704`	`705`	`rate=self.measured_rates[next_rate_index],`