Skip to content

Commit d8858b6

Browse files
committed
Refactors to simplify and enable benchmark profiles
1 parent 4a789bc commit d8858b6

File tree

10 files changed

+973
-1065
lines changed

10 files changed

+973
-1065
lines changed

src/guidellm/benchmark/benchmarker.py

Lines changed: 37 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -5,151 +5,76 @@
55
from pathlib import Path
66
from typing import (
77
Any,
8+
Callable,
89
Generic,
9-
Literal,
1010
Optional,
1111
Union,
1212
)
1313

14-
from pydantic import Field
1514
from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
1615

17-
from guidellm.backend import Backend, ResponseSummary
16+
from guidellm.backend import Backend
1817
from guidellm.benchmark.aggregator import (
1918
AggregatorT,
2019
BenchmarkT,
2120
GenerativeBenchmarkAggregator,
2221
)
2322
from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
2423
from guidellm.benchmark.profile import Profile
25-
from guidellm.objects import StandardBaseModel
2624
from guidellm.request import (
27-
GenerationRequest,
2825
GenerativeRequestLoaderDescription,
29-
RequestLoaderDescription,
3026
)
3127
from guidellm.scheduler import (
32-
GenerativeRequestsWorker,
33-
RequestsWorker,
28+
BackendT,
29+
Environment,
3430
RequestT,
3531
ResponseT,
36-
Scheduler,
37-
SchedulerRequestResult,
32+
ScheduledRequestInfo,
33+
SchedulerState,
34+
SchedulerUpdateAction,
3835
SchedulingStrategy,
3936
)
37+
from guidellm.utils import ThreadSafeSingletonMixin
4038

41-
__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
39+
__all__ = ["Benchmarker", "GenerativeBenchmarker"]
4240

4341

44-
class BenchmarkerResult(
45-
StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
46-
):
47-
type_: Literal[
48-
"run_start",
49-
"run_complete",
50-
"scheduler_start",
51-
"scheduler_update",
52-
"scheduler_complete",
53-
"benchmark_compiled",
54-
]
55-
start_time: float
56-
end_number: int
57-
profile: Profile
58-
current_index: int
59-
current_strategy: Optional[SchedulingStrategy] = None
60-
current_aggregator: Optional[AggregatorT] = None
61-
current_benchmark: Optional[BenchmarkT] = None
62-
current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
63-
64-
65-
class BenchmarkerStrategyLimits(StandardBaseModel):
66-
requests_loader_size: Optional[int] = Field(
67-
description="Size of the request loader.",
68-
)
69-
max_number_per_strategy: Optional[int] = Field(
70-
description="Maximum number of requests to process per strategy.",
71-
ge=0,
72-
)
73-
max_duration_per_strategy: Optional[float] = Field(
74-
description="Maximum duration (in seconds) to process requests per strategy.",
75-
ge=0,
76-
)
77-
warmup_percent_per_strategy: Optional[float] = Field(
78-
description="Percentage of requests to use for warmup.",
79-
ge=0,
80-
le=1,
81-
)
82-
cooldown_percent_per_strategy: Optional[float] = Field(
83-
description="Percentage of requests to use for cooldown.",
84-
ge=0,
85-
le=1,
86-
)
87-
88-
@property
89-
def max_number(self) -> Optional[int]:
90-
if self.max_number_per_strategy is not None:
91-
return self.max_number_per_strategy
92-
93-
if self.requests_loader_size is not None:
94-
return self.requests_loader_size
95-
96-
return None
97-
98-
@property
99-
def max_duration(self) -> Optional[float]:
100-
return self.max_duration_per_strategy
101-
102-
@property
103-
def warmup_number(self) -> Optional[int]:
104-
if self.warmup_percent_per_strategy is None or self.max_number is None:
105-
return None
106-
107-
return int(self.warmup_percent_per_strategy * self.max_number)
108-
109-
@property
110-
def warmup_duration(self) -> Optional[float]:
111-
if self.warmup_percent_per_strategy is None or self.max_duration is None:
112-
return None
113-
114-
return self.warmup_percent_per_strategy * self.max_duration
115-
116-
@property
117-
def cooldown_number(self) -> Optional[int]:
118-
if self.cooldown_percent_per_strategy is None or self.max_number is None:
119-
return None
120-
121-
return int(self.cooldown_percent_per_strategy * self.max_number)
42+
"""
43+
Scheduler:
44+
requests: Iterable[
45+
Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]]
46+
],
47+
backend: BackendT[RequestT, ResponseT],
48+
strategy: SchedulingStrategy,
49+
env: Environment,
50+
**constraints: dict[
51+
str, Union[int, float, str, ConstraintsResolveArgs, CallableConstraint]
52+
],
12253
123-
@property
124-
def cooldown_duration(self) -> Optional[float]:
125-
if self.cooldown_percent_per_strategy is None or self.max_duration is None:
126-
return None
54+
CallableConstraint = Callable[
55+
[SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction
56+
]
57+
"""
12758

128-
return self.cooldown_percent_per_strategy * self.max_duration
12959

60+
CallableConstraintInitializer = Callable[
61+
[AggregatorT, BenchmarkT],
62+
Callable[[SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction],
63+
]
13064

131-
class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
132-
def __init__(
133-
self,
134-
worker: RequestsWorker[RequestT, ResponseT],
135-
request_loader: Iterable[RequestT],
136-
requests_loader_description: RequestLoaderDescription,
137-
benchmark_save_extras: Optional[dict[str, Any]] = None,
138-
):
139-
self.worker = worker
140-
self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
141-
worker=worker, request_loader=request_loader
142-
)
143-
self.requests_loader_description = requests_loader_description
144-
self.benchmark_save_extras = benchmark_save_extras
14565

66+
class Benchmarker(
67+
Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC, ThreadSafeSingletonMixin
68+
):
14669
async def run(
14770
self,
71+
requests: Iterable[
72+
Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]]
73+
],
74+
backend: BackendT[RequestT, ResponseT],
14875
profile: Profile,
149-
max_number_per_strategy: Optional[int],
150-
max_duration_per_strategy: Optional[float],
151-
warmup_percent_per_strategy: Optional[float],
152-
cooldown_percent_per_strategy: Optional[float],
76+
environment: Environment,
77+
aggregator: type[AggregatorT],
15378
) -> AsyncGenerator[
15479
BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
15580
]:

0 commit comments

Comments
 (0)