Skip to content

Commit 640744c

Browse files
style + type fixes
1 parent 039db66 commit 640744c

File tree

7 files changed

+45
-38
lines changed

7 files changed

+45
-38
lines changed

src/guidellm/__main__.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,19 @@ def cli():
164164
),
165165
)
166166
@click.option(
167-
"--max-error-rate",
167+
"--max-error",
168168
type=float,
169169
help=(
170-
"The maximum error rate after which a benchmark will stop. "
171-
"Applicable only for finite deterministic scenarios i.e "
172-
"rate_type is 'constant' and 'max_seconds' exists OR "
173-
"'max_requests' exists OR the dataset is finite. "
174-
"If None or not applicable, benchmarks will continue regardless of error rate."
170+
"The maximum error after which a benchmark will stop. "
171+
"Can either be a rate i.e 0 < rate < 1 or constant number. "
172+
"If rate is given and rate_type is 'constant' and 'max_seconds' exists "
173+
"then the rate will be calculated as part of the total expected "
174+
"requests count i.e rate * duration. If rate is given and number"
175+
"of requests is not pre-determined than a context window "
176+
"of the last requests will be looked at. Context window size"
177+
"is configurable under GUIDELLM__ERROR_CHECK_WINDOW_SIZE."
178+
"If a number above 1 is given than we just count the total"
179+
"number of error and check if it's above the threshold."
175180
),
176181
)
177182
@click.option(
@@ -253,7 +258,7 @@ def benchmark(
253258
rate,
254259
max_seconds,
255260
max_requests,
256-
max_error_rate,
261+
max_error,
257262
warmup_percent,
258263
cooldown_percent,
259264
disable_progress,
@@ -279,7 +284,7 @@ def benchmark(
279284
rate=rate,
280285
max_seconds=max_seconds,
281286
max_requests=max_requests,
282-
max_error_rate=max_error_rate,
287+
max_error=max_error,
283288
warmup_percent=warmup_percent,
284289
cooldown_percent=cooldown_percent,
285290
show_progress=not disable_progress,

src/guidellm/benchmark/benchmark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ class BenchmarkArgs(StandardBaseModel):
9090
max_duration: Optional[float] = Field(
9191
description="The maximum duration in seconds to run this benchmark, if any."
9292
)
93-
max_error_rate: Optional[float] = Field(
94-
description="Maximum error rate after which a benchmark will stop."
93+
max_error: Optional[float] = Field(
94+
description="Maximum error rate or const after which a benchmark will stop."
9595
)
9696
warmup_number: Optional[int] = Field(
9797
description=(
@@ -220,7 +220,7 @@ class BenchmarkRunStats(StandardBaseModel):
220220
description=(
221221
"The number of errored requests divided by the number "
222222
"of successful and errored requests. "
223-
"This can be higher than max_error_rate "
223+
"This can be higher than max_error "
224224
"(if applicable) cause it does not take into "
225225
"account incomplete requests."
226226
)

src/guidellm/benchmark/benchmarker.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ class BenchmarkerStrategyLimits(StandardBaseModel):
7474
description="Maximum duration (in seconds) to process requests per strategy.",
7575
ge=0,
7676
)
77-
max_error_rate: Optional[float] = Field(
78-
description="Maximum error rate after which a benchmark will stop",
77+
max_error: Optional[float] = Field(
78+
description="Maximum error after which a "
79+
"benchmark will stop,"
80+
" either rate or fixed number",
7981
ge=0,
8082
)
8183
warmup_percent_per_strategy: Optional[float] = Field(
@@ -152,7 +154,7 @@ async def run(
152154
profile: Profile,
153155
max_number_per_strategy: Optional[int],
154156
max_duration_per_strategy: Optional[float],
155-
max_error_rate: Optional[float],
157+
max_error: Optional[float],
156158
warmup_percent_per_strategy: Optional[float],
157159
cooldown_percent_per_strategy: Optional[float],
158160
) -> AsyncGenerator[
@@ -167,7 +169,7 @@ async def run(
167169
requests_loader_size=requests_loader_size,
168170
max_number_per_strategy=max_number_per_strategy,
169171
max_duration_per_strategy=max_duration_per_strategy,
170-
max_error_rate=max_error_rate,
172+
max_error=max_error,
171173
warmup_percent_per_strategy=warmup_percent_per_strategy,
172174
cooldown_percent_per_strategy=cooldown_percent_per_strategy,
173175
)
@@ -202,7 +204,7 @@ async def run(
202204
scheduling_strategy=scheduling_strategy,
203205
max_number=max_number_per_strategy,
204206
max_duration=max_duration_per_strategy,
205-
max_error_rate=max_error_rate,
207+
max_error=max_error,
206208
):
207209
if result.type_ == "run_start":
208210
yield BenchmarkerResult(
@@ -328,7 +330,7 @@ def create_benchmark_aggregator(
328330
strategy=strategy,
329331
max_number=limits.max_number,
330332
max_duration=limits.max_duration,
331-
max_error_rate=limits.max_error_rate,
333+
max_error=limits.max_error,
332334
warmup_number=limits.warmup_number,
333335
warmup_duration=limits.warmup_duration,
334336
cooldown_number=limits.cooldown_number,

src/guidellm/benchmark/entrypoints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ async def benchmark_generative_text(
4141
rate: Optional[Union[float, list[float]]],
4242
max_seconds: Optional[float],
4343
max_requests: Optional[int],
44-
max_error_rate: Optional[float],
44+
max_error: Optional[float],
4545
warmup_percent: Optional[float],
4646
cooldown_percent: Optional[float],
4747
show_progress: bool,
@@ -108,7 +108,7 @@ async def benchmark_generative_text(
108108
profile=profile,
109109
max_number_per_strategy=max_requests,
110110
max_duration_per_strategy=max_seconds,
111-
max_error_rate=max_error_rate,
111+
max_error=max_error,
112112
warmup_percent_per_strategy=warmup_percent,
113113
cooldown_percent_per_strategy=cooldown_percent,
114114
):

src/guidellm/benchmark/output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def benchmarks_args_str(self) -> str:
419419
{
420420
"max_number": args.max_number,
421421
"max_duration": args.max_duration,
422-
"max_error_rate": args.max_error_rate,
422+
"max_error": args.max_error,
423423
"warmup_number": args.warmup_number,
424424
"warmup_duration": args.warmup_duration,
425425
"cooldown_number": args.cooldown_number,

src/guidellm/scheduler/result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class SchedulerRunInfo(StandardBaseModel):
5151
processes: int
5252
strategy: SchedulingStrategy
5353
last_requests_statuses: deque[RequestStatus]
54-
max_error_rate: Optional[float] = None
54+
max_error: Optional[float] = None
5555

5656
created_requests: int = 0
5757
queued_requests: int = 0

src/guidellm/scheduler/scheduler.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ async def run(
7676
scheduling_strategy: SchedulingStrategy,
7777
max_number: Optional[int] = None,
7878
max_duration: Optional[float] = None,
79-
max_error_rate: Optional[float] = None,
79+
max_error: Optional[float] = None,
8080
) -> AsyncGenerator[
8181
Union[SchedulerResult, SchedulerRequestResult[RequestT, ResponseT]], None
8282
]:
@@ -105,16 +105,16 @@ async def run(
105105
:param max_duration: The maximum duration for the scheduling run.
106106
If None, then no limit is set and either the iterator must be exhaustible
107107
or the max_number must be set.
108-
:param max_error_rate: The maximum error rate after which the
109-
scheduler shuts down.
108+
:param max_error: The maximum error rate or const
109+
after which the scheduler shuts down.
110110
Only applicable in benchmarks with finite deterministic number of requests.
111111
If None or not applicable then scheduler will continue regardless of errors.
112112
:return: An asynchronous generator that yields SchedulerResult objects.
113113
Each SchedulerResult object contains information about the request,
114114
the response, and the run information.
115115
"""
116116
self._validate_scheduler_params(
117-
scheduling_strategy, max_duration, max_error_rate, max_number
117+
scheduling_strategy, max_duration, max_error, max_number
118118
)
119119

120120
with (
@@ -134,16 +134,16 @@ async def run(
134134
raise RuntimeError("shutdown_event is set before starting scheduling")
135135

136136
run_info, requests_iter, times_iter = self._run_setup(
137-
futures, scheduling_strategy, max_number, max_duration, max_error_rate
137+
futures, scheduling_strategy, max_number, max_duration, max_error
138138
)
139139
yield SchedulerResult(
140140
type_="run_start",
141141
run_info=run_info,
142142
)
143143

144144
try:
145-
max_error_rate_reached = False
146-
while not max_error_rate_reached:
145+
max_error_reached = False
146+
while not max_error_reached:
147147
# check errors and raise them
148148
for future in futures:
149149
if future.done() and (err := future.exception()) is not None:
@@ -173,13 +173,13 @@ async def run(
173173
if (
174174
iter_result.request_info.errored
175175
and not iter_result.request_info.canceled
176-
and self._is_max_error_rate_reached(iter_result.run_info)
176+
and self._is_max_error_reached(iter_result.run_info)
177177
):
178178
shutdown_event.set()
179-
max_error_rate_reached = True
179+
max_error_reached = True
180180
logger.info(
181181
f"Max error rate of "
182-
f"({iter_result.run_info.max_error_rate}) "
182+
f"({iter_result.run_info.max_error}) "
183183
f"reached, sending shutdown signal"
184184
)
185185
yield iter_result
@@ -200,7 +200,7 @@ def _validate_scheduler_params(
200200
self,
201201
scheduling_strategy: SchedulingStrategy,
202202
max_duration: Optional[float],
203-
max_error_rate: Optional[float],
203+
max_error: Optional[float],
204204
max_number: Optional[int],
205205
) -> None:
206206
if scheduling_strategy is None or not isinstance(
@@ -211,11 +211,11 @@ def _validate_scheduler_params(
211211
raise ValueError(f"Invalid max_number: {max_number}")
212212
if max_duration is not None and max_duration < 0:
213213
raise ValueError(f"Invalid max_duration: {max_duration}")
214-
if max_error_rate is not None and (max_error_rate < 0):
215-
raise ValueError(f"Invalid max_error_rate: {max_error_rate}")
214+
if max_error is not None and (max_error < 0):
215+
raise ValueError(f"Invalid max_error: {max_error}")
216216

217-
def _is_max_error_rate_reached(self, run_info: SchedulerRunInfo) -> bool:
218-
max_error = run_info.max_error_rate
217+
def _is_max_error_reached(self, run_info: SchedulerRunInfo) -> bool:
218+
max_error = run_info.max_error
219219
if max_error is None:
220220
return False
221221

@@ -322,7 +322,7 @@ def _run_setup(
322322
scheduling_strategy: SchedulingStrategy,
323323
max_number: Optional[int],
324324
max_duration: Optional[float],
325-
max_error_rate: Optional[float],
325+
max_error: Optional[float],
326326
) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
327327
requests_iter = iter(self.request_loader)
328328
start_time = time.time()
@@ -344,7 +344,7 @@ def _run_setup(
344344
end_number=end_number,
345345
processes=len(processes),
346346
strategy=scheduling_strategy,
347-
max_error_rate=max_error_rate,
347+
max_error=max_error,
348348
last_requests_statuses=collections.deque(
349349
maxlen=settings.error_check_window_size
350350
),

0 commit comments

Comments
 (0)