|
16 | 16 |
|
17 | 17 | from pydantic import Field, field_validator
|
18 | 18 |
|
19 |
| -from guidellm.config import settings |
20 | 19 | from guidellm.scheduler.objects import (
|
21 | 20 | ScheduledRequestInfo,
|
22 | 21 | SchedulerState,
|
23 | 22 | SchedulerUpdateAction,
|
24 | 23 | SchedulerUpdateActionProgress,
|
25 | 24 | )
|
| 25 | +from guidellm.settings import settings |
26 | 26 | from guidellm.utils import InfoMixin, RegistryMixin, StandardBaseModel
|
27 | 27 |
|
28 | 28 | __all__ = [
|
|
35 | 35 | "MaxGlobalErrorRateConstraint",
|
36 | 36 | "MaxNumberConstraint",
|
37 | 37 | "PydanticConstraintInitializer",
|
| 38 | + "RequestsExhaustedConstraint", |
38 | 39 | "SerializableConstraintInitializer",
|
39 | 40 | "UnserializableConstraintInitializer",
|
40 | 41 | ]
|
@@ -988,3 +989,47 @@ def _validate_max_error_rate(
|
988 | 989 | )
|
989 | 990 |
|
990 | 991 | return value[0] if isinstance(value, list) and len(value) == 1 else value
|
| 992 | + |
| 993 | + |
| 994 | +class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin): |
| 995 | + type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment] |
| 996 | + num_requests: int |
| 997 | + |
| 998 | + @property |
| 999 | + def info(self) -> dict[str, Any]: |
| 1000 | + """ |
| 1001 | + Extract serializable information from this constraint initializer. |
| 1002 | +
|
| 1003 | + :return: Dictionary containing constraint configuration and metadata |
| 1004 | + """ |
| 1005 | + return self.model_dump() |
| 1006 | + |
| 1007 | + def __call__( |
| 1008 | + self, |
| 1009 | + state: SchedulerState, |
| 1010 | + request_info: ScheduledRequestInfo, # noqa: ARG002 |
| 1011 | + ) -> SchedulerUpdateAction: |
| 1012 | + create_exceeded = state.created_requests >= self.num_requests |
| 1013 | + processed_exceeded = state.processed_requests >= self.num_requests |
| 1014 | + remaining_fraction = min( |
| 1015 | + max(0.0, 1.0 - state.processed_requests / float(self.num_requests)), 1.0 |
| 1016 | + ) |
| 1017 | + remaining_requests = max(0, self.num_requests - state.processed_requests) |
| 1018 | + |
| 1019 | + return SchedulerUpdateAction( |
| 1020 | + request_queuing="stop" if create_exceeded else "continue", |
| 1021 | + request_processing="stop_local" if processed_exceeded else "continue", |
| 1022 | + metadata={ |
| 1023 | + "num_requests": self.num_requests, |
| 1024 | + "create_exceeded": create_exceeded, |
| 1025 | + "processed_exceeded": processed_exceeded, |
| 1026 | + "created_requests": state.created_requests, |
| 1027 | + "processed_requests": state.processed_requests, |
| 1028 | + "remaining_fraction": remaining_fraction, |
| 1029 | + "remaining_requests": remaining_requests, |
| 1030 | + }, |
| 1031 | + progress=SchedulerUpdateActionProgress( |
| 1032 | + remaining_fraction=remaining_fraction, |
| 1033 | + remaining_requests=remaining_requests, |
| 1034 | + ), |
| 1035 | + ) |
0 commit comments