Skip to content

Commit 6c6c15a

Browse files
Support more rate type as well as const error count value
1 parent f11da24 commit 6c6c15a

File tree

4 files changed

+43
-30
lines changed

4 files changed

+43
-30
lines changed

src/guidellm/benchmark/benchmarker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ class BenchmarkerStrategyLimits(StandardBaseModel):
7777
max_error_rate: Optional[float] = Field(
7878
description="Maximum error rate after which a benchmark will stop",
7979
ge=0,
80-
le=1,
8180
)
8281
warmup_percent_per_strategy: Optional[float] = Field(
8382
description="Percentage of requests to use for warmup.",

src/guidellm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class Settings(BaseSettings):
114114
logging: LoggingSettings = LoggingSettings()
115115
default_sweep_number: int = 10
116116
shutdown_poll_interval_seconds: float = 10
117-
constant_error_check_window_size = 100
117+
error_check_window_size: int = 10
118118

119119
# HTTP settings
120120
request_follow_redirects: bool = True

src/guidellm/scheduler/result.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
]
1818

1919

20-
RequestStatus = Literal["success" | "error"]
20+
RequestStatus = Literal["success", "error"]
21+
2122

2223
class SchedulerRunInfo(StandardBaseModel):
2324
"""

src/guidellm/scheduler/scheduler.py

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
Any,
1313
Generic,
1414
Optional,
15-
Union,
15+
Union, Literal, cast,
1616
)
1717

1818
from loguru import logger
@@ -213,33 +213,48 @@ def _validate_scheduler_params(
213213
raise ValueError(f"Invalid max_number: {max_number}")
214214
if max_duration is not None and max_duration < 0:
215215
raise ValueError(f"Invalid max_duration: {max_duration}")
216-
if max_error_rate is not None and (max_error_rate < 0 or max_error_rate > 1):
216+
if max_error_rate is not None and (max_error_rate < 0):
217217
raise ValueError(f"Invalid max_error_rate: {max_error_rate}")
218218

219219
def _is_max_error_rate_reached(self, run_info: SchedulerRunInfo) -> bool:
220-
if run_info.max_error_rate is None:
220+
max_error = run_info.max_error_rate
221+
if max_error is None:
221222
return False
222223

223-
is_max_error_rate = run_info.max_error_rate < 1
224-
if not is_max_error_rate:
225-
# Constant value
226-
raise NotImplementedError()
227-
if(
224+
if not max_error < 1:
225+
# Absolute error count, i.e not a ratio
226+
logger.debug(
227+
f"Current error count "
228+
f"{run_info.errored_requests} / "
229+
f"{max_error} (max error)"
230+
)
231+
return max_error < run_info.errored_requests
232+
elif(
228233
run_info.strategy.type_ == "constant"
229234
and run_info.end_number != math.inf
230235
):
231-
# We know how many requests
232-
current_error_rate = run_info.errored_requests / run_info.end_number
236+
current_error_ratio = run_info.errored_requests / run_info.end_number
233237
logger.debug(
234-
f"Current error rate {current_error_rate} "
238+
f"Current error rate {current_error_ratio} "
235239
f"i.e total_finished [success / error] / max total possible"
236240
)
237-
return run_info.max_error_rate < current_error_rate
238-
elif settings.constant_error_check_window_size <= run_info.completed_requests:
239-
# Calculate deque ratio or success to erorr
240-
if run_info.last_requests_statuses is None:
241-
raise RuntimeError("")
242-
return
241+
return max_error < current_error_ratio
242+
elif settings.error_check_window_size <= run_info.completed_requests:
243+
last_requests_statuses = run_info.last_requests_statuses
244+
last_errored_requests_count = len([
245+
s
246+
for s
247+
in last_requests_statuses
248+
if s == "error"
249+
])
250+
current_error_ratio = last_errored_requests_count / len(last_requests_statuses)
251+
logger.debug(
252+
f"Current error rate in "
253+
f"last requests window is "
254+
f"{current_error_ratio} / {max_error} "
255+
f"(max error rate)"
256+
)
257+
return max_error < current_error_ratio
243258
return False
244259

245260
async def _start_processes(
@@ -323,12 +338,6 @@ def _run_setup(
323338
scheduling_strategy, max_duration, max_number
324339
)
325340

326-
if end_number == math.inf and max_error_rate is not None:
327-
logger.warning(
328-
"max_error_rate will be ignored "
329-
"because end_number can not be determined."
330-
)
331-
332341
if end_number == math.inf and end_time is None:
333342
logger.warning(
334343
"No end number or end time set, "
@@ -342,7 +351,9 @@ def _run_setup(
342351
processes=len(processes),
343352
strategy=scheduling_strategy,
344353
max_error_rate=max_error_rate,
345-
last_requests_statuses = collections.deque(maxlen=settings.constant_error_check_window_size) if max_error_rate > 1 else None
354+
last_requests_statuses=collections.deque(
355+
maxlen=settings.error_check_window_size
356+
)
346357
)
347358

348359
return info, requests_iter, times_iter
@@ -459,9 +470,11 @@ def _check_result_ready(
459470
if is_errored:
460471
run_info.errored_requests += 1
461472

462-
if run_info.last_requests_statuses:
463-
status = "error" if is_errored else "success"
464-
run_info.last_requests_statuses.append(status)
473+
request_status: Literal["error", "success"] = cast(
474+
Literal["error", "success"],
475+
"error" if is_errored else "success"
476+
)
477+
run_info.last_requests_statuses.append(request_status)
465478

466479
return SchedulerRequestResult(
467480
type_="request_complete",

0 commit comments

Comments
 (0)