|
5 | 5 | from pathlib import Path
|
6 | 6 | from typing import (
|
7 | 7 | Any,
|
| 8 | + Callable, |
8 | 9 | Generic,
|
9 |
| - Literal, |
10 | 10 | Optional,
|
11 | 11 | Union,
|
12 | 12 | )
|
13 | 13 |
|
14 |
| -from pydantic import Field |
15 | 14 | from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
|
16 | 15 |
|
17 |
| -from guidellm.backend import Backend, ResponseSummary |
| 16 | +from guidellm.backend import Backend |
18 | 17 | from guidellm.benchmark.aggregator import (
|
19 | 18 | AggregatorT,
|
20 | 19 | BenchmarkT,
|
21 | 20 | GenerativeBenchmarkAggregator,
|
22 | 21 | )
|
23 | 22 | from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
|
24 | 23 | from guidellm.benchmark.profile import Profile
|
25 |
| -from guidellm.objects import StandardBaseModel |
26 | 24 | from guidellm.request import (
|
27 |
| - GenerationRequest, |
28 | 25 | GenerativeRequestLoaderDescription,
|
29 |
| - RequestLoaderDescription, |
30 | 26 | )
|
31 | 27 | from guidellm.scheduler import (
|
32 |
| - GenerativeRequestsWorker, |
33 |
| - RequestsWorker, |
| 28 | + BackendT, |
| 29 | + Environment, |
34 | 30 | RequestT,
|
35 | 31 | ResponseT,
|
36 |
| - Scheduler, |
37 |
| - SchedulerRequestResult, |
| 32 | + ScheduledRequestInfo, |
| 33 | + SchedulerState, |
| 34 | + SchedulerUpdateAction, |
38 | 35 | SchedulingStrategy,
|
39 | 36 | )
|
| 37 | +from guidellm.utils import ThreadSafeSingletonMixin |
40 | 38 |
|
41 |
| -__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"] |
| 39 | +__all__ = ["Benchmarker", "GenerativeBenchmarker"] |
42 | 40 |
|
43 | 41 |
|
44 |
| -class BenchmarkerResult( |
45 |
| - StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT] |
46 |
| -): |
47 |
| - type_: Literal[ |
48 |
| - "run_start", |
49 |
| - "run_complete", |
50 |
| - "scheduler_start", |
51 |
| - "scheduler_update", |
52 |
| - "scheduler_complete", |
53 |
| - "benchmark_compiled", |
54 |
| - ] |
55 |
| - start_time: float |
56 |
| - end_number: int |
57 |
| - profile: Profile |
58 |
| - current_index: int |
59 |
| - current_strategy: Optional[SchedulingStrategy] = None |
60 |
| - current_aggregator: Optional[AggregatorT] = None |
61 |
| - current_benchmark: Optional[BenchmarkT] = None |
62 |
| - current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None |
63 |
| - |
64 |
| - |
65 |
| -class BenchmarkerStrategyLimits(StandardBaseModel): |
66 |
| - requests_loader_size: Optional[int] = Field( |
67 |
| - description="Size of the request loader.", |
68 |
| - ) |
69 |
| - max_number_per_strategy: Optional[int] = Field( |
70 |
| - description="Maximum number of requests to process per strategy.", |
71 |
| - ge=0, |
72 |
| - ) |
73 |
| - max_duration_per_strategy: Optional[float] = Field( |
74 |
| - description="Maximum duration (in seconds) to process requests per strategy.", |
75 |
| - ge=0, |
76 |
| - ) |
77 |
| - warmup_percent_per_strategy: Optional[float] = Field( |
78 |
| - description="Percentage of requests to use for warmup.", |
79 |
| - ge=0, |
80 |
| - le=1, |
81 |
| - ) |
82 |
| - cooldown_percent_per_strategy: Optional[float] = Field( |
83 |
| - description="Percentage of requests to use for cooldown.", |
84 |
| - ge=0, |
85 |
| - le=1, |
86 |
| - ) |
87 |
| - |
88 |
| - @property |
89 |
| - def max_number(self) -> Optional[int]: |
90 |
| - if self.max_number_per_strategy is not None: |
91 |
| - return self.max_number_per_strategy |
92 |
| - |
93 |
| - if self.requests_loader_size is not None: |
94 |
| - return self.requests_loader_size |
95 |
| - |
96 |
| - return None |
97 |
| - |
98 |
| - @property |
99 |
| - def max_duration(self) -> Optional[float]: |
100 |
| - return self.max_duration_per_strategy |
101 |
| - |
102 |
| - @property |
103 |
| - def warmup_number(self) -> Optional[int]: |
104 |
| - if self.warmup_percent_per_strategy is None or self.max_number is None: |
105 |
| - return None |
106 |
| - |
107 |
| - return int(self.warmup_percent_per_strategy * self.max_number) |
108 |
| - |
109 |
| - @property |
110 |
| - def warmup_duration(self) -> Optional[float]: |
111 |
| - if self.warmup_percent_per_strategy is None or self.max_duration is None: |
112 |
| - return None |
113 |
| - |
114 |
| - return self.warmup_percent_per_strategy * self.max_duration |
115 |
| - |
116 |
| - @property |
117 |
| - def cooldown_number(self) -> Optional[int]: |
118 |
| - if self.cooldown_percent_per_strategy is None or self.max_number is None: |
119 |
| - return None |
120 |
| - |
121 |
| - return int(self.cooldown_percent_per_strategy * self.max_number) |
| 42 | +""" |
| 43 | +Scheduler: |
| 44 | + requests: Iterable[ |
| 45 | + Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]] |
| 46 | + ], |
| 47 | + backend: BackendT[RequestT, ResponseT], |
| 48 | + strategy: SchedulingStrategy, |
| 49 | + env: Environment, |
| 50 | + **constraints: dict[ |
| 51 | + str, Union[int, float, str, ConstraintsResolveArgs, CallableConstraint] |
| 52 | + ], |
122 | 53 |
|
123 |
| - @property |
124 |
| - def cooldown_duration(self) -> Optional[float]: |
125 |
| - if self.cooldown_percent_per_strategy is None or self.max_duration is None: |
126 |
| - return None |
| 54 | +CallableConstraint = Callable[ |
| 55 | + [SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction |
| 56 | +] |
| 57 | +""" |
127 | 58 |
|
128 |
| - return self.cooldown_percent_per_strategy * self.max_duration |
129 | 59 |
|
| 60 | +CallableConstraintInitializer = Callable[ |
| 61 | + [AggregatorT, BenchmarkT], |
| 62 | + Callable[[SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction], |
| 63 | +] |
130 | 64 |
|
131 |
| -class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC): |
132 |
| - def __init__( |
133 |
| - self, |
134 |
| - worker: RequestsWorker[RequestT, ResponseT], |
135 |
| - request_loader: Iterable[RequestT], |
136 |
| - requests_loader_description: RequestLoaderDescription, |
137 |
| - benchmark_save_extras: Optional[dict[str, Any]] = None, |
138 |
| - ): |
139 |
| - self.worker = worker |
140 |
| - self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler( |
141 |
| - worker=worker, request_loader=request_loader |
142 |
| - ) |
143 |
| - self.requests_loader_description = requests_loader_description |
144 |
| - self.benchmark_save_extras = benchmark_save_extras |
145 | 65 |
|
| 66 | +class Benchmarker( |
| 67 | + Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC, ThreadSafeSingletonMixin |
| 68 | +): |
146 | 69 | async def run(
|
147 | 70 | self,
|
| 71 | + requests: Iterable[ |
| 72 | + Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]] |
| 73 | + ], |
| 74 | + backend: BackendT[RequestT, ResponseT], |
148 | 75 | profile: Profile,
|
149 |
| - max_number_per_strategy: Optional[int], |
150 |
| - max_duration_per_strategy: Optional[float], |
151 |
| - warmup_percent_per_strategy: Optional[float], |
152 |
| - cooldown_percent_per_strategy: Optional[float], |
| 76 | + environment: Environment, |
| 77 | + aggregator: type[AggregatorT], |
153 | 78 | ) -> AsyncGenerator[
|
154 | 79 | BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
|
155 | 80 | ]:
|
|
0 commit comments