|
1 | 1 | """ |
2 | | -Standard constraint implementations. |
| 2 | +Error-based constraint implementations. |
3 | 3 |
|
4 | | -Provides standard constraint types for limiting benchmark execution based on |
5 | | -time, error rates, and request counts. |
| 4 | +Provides constraint types for limiting benchmark execution based on error rates |
| 5 | +and error counts. These constraints monitor request error status to determine |
| 6 | +when to stop benchmark execution due to excessive errors. |
6 | 7 | """ |
7 | 8 |
|
8 | 9 | from __future__ import annotations |
|
17 | 18 | SchedulerState, |
18 | 19 | SchedulerUpdateAction, |
19 | 20 | ) |
20 | | -from guidellm.schemas import RequestInfo, StandardBaseModel |
| 21 | +from guidellm.schemas import RequestInfo |
21 | 22 | from guidellm.settings import settings |
22 | | -from guidellm.utils import InfoMixin |
23 | 23 |
|
24 | 24 | from .constraint import Constraint, PydanticConstraintInitializer |
25 | 25 | from .factory import ConstraintsInitializerFactory |
26 | 26 |
|
27 | 27 | __all__ = [ |
28 | | - "MaxDurationConstraint", |
29 | 28 | "MaxErrorRateConstraint", |
30 | 29 | "MaxErrorsConstraint", |
31 | 30 | "MaxGlobalErrorRateConstraint", |
32 | | - "MaxNumberConstraint", |
33 | | - "RequestsExhaustedConstraint", |
34 | 31 | ] |
35 | 32 |
|
36 | 33 |
|
37 | | -@ConstraintsInitializerFactory.register( # type: ignore[arg-type] |
38 | | - ["max_number", "max_num", "max_requests", "max_req"] |
39 | | -) |
40 | | -class MaxNumberConstraint(PydanticConstraintInitializer): |
41 | | - """ |
42 | | - Constraint that limits execution based on maximum request counts. |
43 | | -
|
44 | | - Stops request queuing when created requests reach the limit and stops local |
45 | | - request processing when processed requests reach the limit. Provides progress |
46 | | - tracking based on remaining requests and completion fraction. |
47 | | - """ |
48 | | - |
49 | | - type_: Literal["max_number"] = "max_number" # type: ignore[assignment] |
50 | | - max_num: int | float | list[int | float] = Field( |
51 | | - description="Maximum number of requests allowed before triggering constraint", |
52 | | - ) |
53 | | - current_index: int = Field( |
54 | | - default=-1, description="Current index for list-based max_num values" |
55 | | - ) |
56 | | - |
57 | | - @classmethod |
58 | | - def validated_kwargs( |
59 | | - cls, max_num: int | float | list[int | float], **kwargs |
60 | | - ) -> dict[str, Any]: |
61 | | - """ |
62 | | - Validate and process arguments for MaxNumberConstraint creation. |
63 | | -
|
64 | | - :param max_num: Maximum number of requests to allow |
65 | | - :param kwargs: Supports max_num, max_number, max_requests, max_req, |
66 | | - and optional type_ |
67 | | - :return: Validated dictionary with max_num and type_ fields |
68 | | - """ |
69 | | - aliases = ["max_number", "max_num", "max_requests", "max_req"] |
70 | | - for alias in aliases: |
71 | | - if max_num is None: |
72 | | - max_num = kwargs.get(alias) |
73 | | - |
74 | | - return {"max_num": max_num, "current_index": kwargs.get("current_index", -1)} |
75 | | - |
76 | | - def create_constraint(self, **_kwargs) -> Constraint: |
77 | | - """ |
78 | | - Return self as the constraint instance. |
79 | | -
|
80 | | - :param kwargs: Additional keyword arguments (unused) |
81 | | - :return: Self instance as the constraint |
82 | | - """ |
83 | | - self.current_index += 1 |
84 | | - |
85 | | - return cast("Constraint", self.model_copy()) |
86 | | - |
87 | | - def __call__( |
88 | | - self, state: SchedulerState, request_info: RequestInfo |
89 | | - ) -> SchedulerUpdateAction: |
90 | | - """ |
91 | | - Evaluate constraint against current scheduler state and request count. |
92 | | -
|
93 | | - :param state: Current scheduler state with request counts |
94 | | - :param request_info: Individual request information (unused) |
95 | | - :return: Action indicating whether to continue or stop operations |
96 | | - """ |
97 | | - _ = request_info # Unused parameters |
98 | | - current_index = max(0, self.current_index) |
99 | | - max_num = ( |
100 | | - self.max_num |
101 | | - if isinstance(self.max_num, int | float) |
102 | | - else self.max_num[min(current_index, len(self.max_num) - 1)] |
103 | | - ) |
104 | | - |
105 | | - create_exceeded = state.created_requests >= max_num |
106 | | - processed_exceeded = state.processed_requests >= max_num |
107 | | - remaining_requests = min(max(0, max_num - state.processed_requests), max_num) |
108 | | - stop_time = ( |
109 | | - None if remaining_requests > 0 else request_info.completed_at or time.time() |
110 | | - ) |
111 | | - |
112 | | - return SchedulerUpdateAction( |
113 | | - request_queuing="stop" if create_exceeded else "continue", |
114 | | - request_processing="stop_local" if processed_exceeded else "continue", |
115 | | - metadata={ |
116 | | - "max_number": max_num, |
117 | | - "create_exceeded": create_exceeded, |
118 | | - "processed_exceeded": processed_exceeded, |
119 | | - "created_requests": state.created_requests, |
120 | | - "processed_requests": state.processed_requests, |
121 | | - "remaining_requests": remaining_requests, |
122 | | - "stop_time": stop_time, |
123 | | - }, |
124 | | - progress=SchedulerProgress( |
125 | | - remaining_requests=remaining_requests, |
126 | | - total_requests=max_num, |
127 | | - stop_time=stop_time, |
128 | | - ), |
129 | | - ) |
130 | | - |
131 | | - @field_validator("max_num") |
132 | | - @classmethod |
133 | | - def _validate_max_num( |
134 | | - cls, value: int | float | list[int | float] |
135 | | - ) -> int | float | list[int | float]: |
136 | | - if not isinstance(value, list): |
137 | | - value = [value] |
138 | | - for val in value: |
139 | | - if not val: |
140 | | - raise ValueError( |
141 | | - f"max_num must be set and truthful, received {value} ({val} failed)" |
142 | | - ) |
143 | | - if not isinstance(val, int | float) or val <= 0: |
144 | | - raise ValueError( |
145 | | - f"max_num must be a positive num, received {value} ({val} failed)" |
146 | | - ) |
147 | | - |
148 | | - return value[0] if isinstance(value, list) and len(value) == 1 else value |
149 | | - |
150 | | - |
151 | | -@ConstraintsInitializerFactory.register( |
152 | | - ["max_duration", "max_dur", "max_sec", "max_seconds", "max_min", "max_minutes"] |
153 | | -) |
154 | | -class MaxDurationConstraint(PydanticConstraintInitializer): |
155 | | - """ |
156 | | - Constraint that limits execution based on maximum time duration. |
157 | | -
|
158 | | - Stops both request queuing and processing when the elapsed time since scheduler |
159 | | - start exceeds the maximum duration. Provides progress tracking based on |
160 | | - remaining time and completion fraction. |
161 | | - """ |
162 | | - |
163 | | - type_: Literal["max_duration"] = "max_duration" # type: ignore[assignment] |
164 | | - max_duration: int | float | list[int | float] = Field( |
165 | | - description="Maximum duration in seconds before triggering constraint" |
166 | | - ) |
167 | | - current_index: int = Field(default=-1, description="Current index in duration list") |
168 | | - |
169 | | - @classmethod |
170 | | - def validated_kwargs( |
171 | | - cls, max_duration: int | float | list[int | float] | None = None, **kwargs |
172 | | - ) -> dict[str, Any]: |
173 | | - """ |
174 | | - Validate and process arguments for MaxDurationConstraint creation. |
175 | | -
|
176 | | - :param max_duration: Maximum duration in seconds |
177 | | - :param kwargs: Supports max_duration, max_dur, max_sec, max_seconds, |
178 | | - max_min, max_minutes, and optional type_ |
179 | | - :return: Validated dictionary with max_duration and type_ fields |
180 | | - """ |
181 | | - seconds_aliases = ["max_dur", "max_sec", "max_seconds"] |
182 | | - for alias in seconds_aliases: |
183 | | - if max_duration is None: |
184 | | - max_duration = kwargs.get(alias) |
185 | | - minutes_aliases = ["max_min", "max_minutes"] |
186 | | - for alias in minutes_aliases: |
187 | | - minutes = kwargs.get(alias) |
188 | | - if minutes is not None and max_duration is None: |
189 | | - max_duration = minutes * 60 |
190 | | - |
191 | | - return { |
192 | | - "max_duration": max_duration, |
193 | | - "current_index": kwargs.get("current_index", -1), |
194 | | - } |
195 | | - |
196 | | - def create_constraint(self, **_kwargs) -> Constraint: |
197 | | - """ |
198 | | - Return self as the constraint instance. |
199 | | -
|
200 | | - :param kwargs: Additional keyword arguments (unused) |
201 | | - :return: Self instance as the constraint |
202 | | - """ |
203 | | - self.current_index += 1 |
204 | | - |
205 | | - return cast("Constraint", self.model_copy()) |
206 | | - |
207 | | - def __call__( |
208 | | - self, state: SchedulerState, request_info: RequestInfo |
209 | | - ) -> SchedulerUpdateAction: |
210 | | - """ |
211 | | - Evaluate constraint against current scheduler state and elapsed time. |
212 | | -
|
213 | | - :param state: Current scheduler state with start time |
214 | | - :param request_info: Individual request information (unused) |
215 | | - :return: Action indicating whether to continue or stop operations |
216 | | - """ |
217 | | - _ = request_info # Unused parameters |
218 | | - current_index = max(0, self.current_index) |
219 | | - max_duration = ( |
220 | | - self.max_duration |
221 | | - if isinstance(self.max_duration, int | float) |
222 | | - else self.max_duration[min(current_index, len(self.max_duration) - 1)] |
223 | | - ) |
224 | | - |
225 | | - current_time = time.time() |
226 | | - elapsed = current_time - state.start_time |
227 | | - duration_exceeded = elapsed >= max_duration |
228 | | - remaining_duration = min(max(0.0, max_duration - elapsed), max_duration) |
229 | | - stop_time = None if not duration_exceeded else state.start_time + max_duration |
230 | | - |
231 | | - return SchedulerUpdateAction( |
232 | | - request_queuing="stop" if duration_exceeded else "continue", |
233 | | - request_processing="stop_local" if duration_exceeded else "continue", |
234 | | - metadata={ |
235 | | - "max_duration": max_duration, |
236 | | - "elapsed_time": elapsed, |
237 | | - "duration_exceeded": duration_exceeded, |
238 | | - "start_time": state.start_time, |
239 | | - "current_time": current_time, |
240 | | - "stop_time": stop_time, |
241 | | - }, |
242 | | - progress=SchedulerProgress( |
243 | | - remaining_duration=remaining_duration, |
244 | | - total_duration=max_duration, |
245 | | - stop_time=stop_time, |
246 | | - ), |
247 | | - ) |
248 | | - |
249 | | - @field_validator("max_duration") |
250 | | - @classmethod |
251 | | - def _validate_max_duration( |
252 | | - cls, value: int | float | list[int | float] |
253 | | - ) -> int | float | list[int | float]: |
254 | | - if not isinstance(value, list): |
255 | | - value = [value] |
256 | | - for val in value: |
257 | | - if not val: |
258 | | - raise ValueError( |
259 | | - "max_duration must be set and truthful, " |
260 | | - f"received {value} ({val} failed)" |
261 | | - ) |
262 | | - if not isinstance(val, int | float) or val <= 0: |
263 | | - raise ValueError( |
264 | | - "max_duration must be a positive num," |
265 | | - f"received {value} ({val} failed)" |
266 | | - ) |
267 | | - |
268 | | - return value[0] if isinstance(value, list) and len(value) == 1 else value |
269 | | - |
270 | | - |
271 | 34 | @ConstraintsInitializerFactory.register( |
272 | 35 | ["max_errors", "max_err", "max_error", "max_errs"] |
273 | 36 | ) |
@@ -646,47 +409,3 @@ def _validate_max_error_rate( |
646 | 409 | ) |
647 | 410 |
|
648 | 411 | return value[0] if isinstance(value, list) and len(value) == 1 else value |
649 | | - |
650 | | - |
651 | | -class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin): |
652 | | - type_: Literal["requests_exhausted"] = "requests_exhausted" # type: ignore[assignment] |
653 | | - num_requests: int |
654 | | - |
655 | | - @property |
656 | | - def info(self) -> dict[str, Any]: |
657 | | - """ |
658 | | - Extract serializable information from this constraint initializer. |
659 | | -
|
660 | | - :return: Dictionary containing constraint configuration and metadata |
661 | | - """ |
662 | | - return self.model_dump() |
663 | | - |
664 | | - def __call__( |
665 | | - self, state: SchedulerState, request: RequestInfo |
666 | | - ) -> SchedulerUpdateAction: |
667 | | - _ = request # Unused parameter |
668 | | - create_exceeded = state.created_requests >= self.num_requests |
669 | | - processed_exceeded = state.processed_requests >= self.num_requests |
670 | | - remaining_requests = max(0, self.num_requests - state.processed_requests) |
671 | | - stop_time = ( |
672 | | - None if remaining_requests > 0 else request.completed_at or time.time() |
673 | | - ) |
674 | | - |
675 | | - return SchedulerUpdateAction( |
676 | | - request_queuing="stop" if create_exceeded else "continue", |
677 | | - request_processing="stop_local" if processed_exceeded else "continue", |
678 | | - metadata={ |
679 | | - "num_requests": self.num_requests, |
680 | | - "create_exceeded": create_exceeded, |
681 | | - "processed_exceeded": processed_exceeded, |
682 | | - "created_requests": state.created_requests, |
683 | | - "processed_requests": state.processed_requests, |
684 | | - "remaining_requests": remaining_requests, |
685 | | - "stop_time": stop_time, |
686 | | - }, |
687 | | - progress=SchedulerProgress( |
688 | | - remaining_requests=remaining_requests, |
689 | | - total_requests=self.num_requests, |
690 | | - stop_time=stop_time, |
691 | | - ), |
692 | | - ) |
0 commit comments