Skip to content

Commit f11da24

Browse files
author
markvaykhansky
committed
WIP - Support more rate types
1 parent 9945710 commit f11da24

File tree

3 files changed

+41
-12
lines changed

3 files changed

+41
-12
lines changed

src/guidellm/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class Settings(BaseSettings):
114114
logging: LoggingSettings = LoggingSettings()
115115
default_sweep_number: int = 10
116116
shutdown_poll_interval_seconds: float = 10
117+
constant_error_check_window_size = 100
117118

118119
# HTTP settings
119120
request_follow_redirects: bool = True

src/guidellm/scheduler/result.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import deque
12
from typing import (
23
Generic,
34
Literal,
@@ -16,6 +17,8 @@
1617
]
1718

1819

20+
RequestStatus = Literal["success" | "error"]
21+
1922
class SchedulerRunInfo(StandardBaseModel):
2023
"""
2124
Information about the current run of the scheduler.
@@ -55,6 +58,8 @@ class SchedulerRunInfo(StandardBaseModel):
5558
completed_requests: int = 0
5659
errored_requests: int = 0
5760

61+
last_requests_statuses: Optional[deque[RequestStatus]] = None
62+
5863

5964
class SchedulerRequestInfo(StandardBaseModel):
6065
"""

src/guidellm/scheduler/scheduler.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import collections
12
from datetime import timedelta
23
import asyncio
34
import math
@@ -128,10 +129,11 @@ async def run(
128129
responses_queue,
129130
shutdown_event,
130131
) = await self._start_processes(
131-
manager, executor, scheduling_strategy, max_error_rate is not None
132+
manager, executor, scheduling_strategy
132133
)
133-
if shutdown_event and shutdown_event.is_set():
134+
if shutdown_event.is_set():
134135
raise RuntimeError("shutdown_event is set before starting scheduling")
136+
135137
run_info, requests_iter, times_iter = self._run_setup(
136138
futures, scheduling_strategy, max_number, max_duration, max_error_rate
137139
)
@@ -217,27 +219,42 @@ def _validate_scheduler_params(
217219
def _is_max_error_rate_reached(self, run_info: SchedulerRunInfo) -> bool:
218220
if run_info.max_error_rate is None:
219221
return False
220-
current_error_rate = run_info.errored_requests / run_info.end_number
221-
logger.debug(
222-
f"Current error rate {current_error_rate} "
223-
f"i.e total_finished [success / error] / max total possible"
224-
)
225-
return run_info.max_error_rate < current_error_rate
222+
223+
is_max_error_rate = run_info.max_error_rate < 1
224+
if not is_max_error_rate:
225+
# Constant value
226+
raise NotImplementedError()
227+
if(
228+
run_info.strategy.type_ == "constant"
229+
and run_info.end_number != math.inf
230+
):
231+
# We know how many requests
232+
current_error_rate = run_info.errored_requests / run_info.end_number
233+
logger.debug(
234+
f"Current error rate {current_error_rate} "
235+
f"i.e total_finished [success / error] / max total possible"
236+
)
237+
return run_info.max_error_rate < current_error_rate
238+
elif settings.constant_error_check_window_size <= run_info.completed_requests:
239+
# Calculate deque ratio or success to erorr
240+
if run_info.last_requests_statuses is None:
241+
raise RuntimeError("")
242+
return
243+
return False
226244

227245
async def _start_processes(
228246
self,
229247
manager,
230248
executor: ProcessPoolExecutor,
231249
scheduling_strategy: SchedulingStrategy,
232-
create_shutdown_event: bool = False,
233250
) -> tuple[
234251
list[asyncio.Future],
235252
multiprocessing.Queue,
236253
multiprocessing.Queue,
237-
Optional[MultiprocessingEvent],
254+
MultiprocessingEvent,
238255
]:
239256
await self.worker.prepare_multiprocessing()
240-
shutdown_event = manager.Event() if create_shutdown_event else None
257+
shutdown_event = manager.Event()
241258
requests_queue = manager.Queue(
242259
maxsize=scheduling_strategy.queued_requests_limit
243260
)
@@ -325,6 +342,7 @@ def _run_setup(
325342
processes=len(processes),
326343
strategy=scheduling_strategy,
327344
max_error_rate=max_error_rate,
345+
last_requests_statuses = collections.deque(maxlen=settings.constant_error_check_window_size) if max_error_rate > 1 else None
328346
)
329347

330348
return info, requests_iter, times_iter
@@ -437,9 +455,14 @@ def _check_result_ready(
437455
run_info.processing_requests -= 1
438456
run_info.completed_requests += 1
439457

440-
if process_response.info.errored:
458+
is_errored = process_response.info.errored
459+
if is_errored:
441460
run_info.errored_requests += 1
442461

462+
if run_info.last_requests_statuses:
463+
status = "error" if is_errored else "success"
464+
run_info.last_requests_statuses.append(status)
465+
443466
return SchedulerRequestResult(
444467
type_="request_complete",
445468
run_info=run_info,

0 commit comments

Comments
 (0)