Skip to content

Commit 414febe

Browse files
committed
Updates to remove the measured request timings generic and replace it with pydantic polymorphism
1 parent 3509a7b commit 414febe

27 files changed

+427
-238
lines changed

research/multiprocesssing_communication_perf/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def create_test_objects(
154154
GenerationRequest(
155155
content=generate_str(objects_size),
156156
),
157-
ScheduledRequestInfo[GenerationRequestTimings](
157+
ScheduledRequestInfo(
158158
scheduler_timings=RequestSchedulerTimings(
159159
targeted_start=time.time(),
160160
queued=time.time(),
@@ -173,7 +173,7 @@ def create_test_objects(
173173
),
174174
)
175175
for _ in range(num_objects)
176-
], [GenerationRequest, ScheduledRequestInfo[GenerationRequestTimings]]
176+
], [GenerationRequest, ScheduledRequestInfo]
177177

178178
if type_ == "tuple[GenerationResponse]":
179179
return [
@@ -186,7 +186,7 @@ def create_test_objects(
186186
GenerationRequest(
187187
content=generate_str(objects_size // 2),
188188
),
189-
ScheduledRequestInfo[GenerationRequestTimings](
189+
ScheduledRequestInfo(
190190
scheduler_timings=RequestSchedulerTimings(
191191
targeted_start=time.time(),
192192
queued=time.time(),
@@ -208,7 +208,7 @@ def create_test_objects(
208208
], [
209209
GenerationResponse,
210210
GenerationRequest,
211-
ScheduledRequestInfo[GenerationRequestTimings],
211+
ScheduledRequestInfo,
212212
]
213213

214214
raise ValueError(f"Unknown type_: {type_}")

src/guidellm/backend/backend.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from guidellm.backend.objects import (
2020
GenerationRequest,
21-
GenerationRequestTimings,
2221
GenerationResponse,
2322
)
2423
from guidellm.scheduler import BackendInterface
@@ -35,7 +34,7 @@
3534

3635
class Backend(
3736
RegistryMixin["type[Backend]"],
38-
BackendInterface[GenerationRequest, GenerationRequestTimings, GenerationResponse],
37+
BackendInterface[GenerationRequest, GenerationResponse],
3938
):
4039
"""
4140
Base class for generative AI backends with registry and lifecycle.

src/guidellm/backend/objects.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212
from pydantic import Field
1313

14-
from guidellm.scheduler import MeasuredRequestTimings
14+
from guidellm.scheduler import (
15+
MeasuredRequestTimings,
16+
SchedulerMessagingPydanticRegistry,
17+
)
1518
from guidellm.utils import StandardBaseModel
1619

1720
__all__ = [
@@ -21,6 +24,7 @@
2124
]
2225

2326

27+
@SchedulerMessagingPydanticRegistry.register()
2428
class GenerationRequest(StandardBaseModel):
2529
"""Request model for backend generation operations."""
2630

@@ -59,6 +63,7 @@ class GenerationRequest(StandardBaseModel):
5963
)
6064

6165

66+
@SchedulerMessagingPydanticRegistry.register()
6267
class GenerationResponse(StandardBaseModel):
6368
"""Response model for backend generation operations."""
6469

@@ -135,9 +140,11 @@ def preferred_output_tokens(
135140
return self.response_output_tokens or self.request_output_tokens
136141

137142

143+
@MeasuredRequestTimings.register()
138144
class GenerationRequestTimings(MeasuredRequestTimings):
139145
"""Timing model for tracking generation request lifecycle events."""
140146

147+
timings_type: Literal["generation_request_timings"] = "generation_request_timings"
141148
first_iteration: Optional[float] = Field(
142149
default=None,
143150
description="Unix timestamp when the first generation iteration began.",
@@ -146,3 +153,6 @@ class GenerationRequestTimings(MeasuredRequestTimings):
146153
default=None,
147154
description="Unix timestamp when the last generation iteration completed.",
148155
)
156+
157+
158+
SchedulerMessagingPydanticRegistry.register_decorator(GenerationRequestTimings)

src/guidellm/backend/openai.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,9 @@ async def default_model(self) -> Optional[str]:
279279
async def resolve(
280280
self,
281281
request: GenerationRequest,
282-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
282+
request_info: ScheduledRequestInfo,
283283
history: Optional[list[tuple[GenerationRequest, GenerationResponse]]] = None,
284-
) -> AsyncIterator[
285-
tuple[GenerationResponse, ScheduledRequestInfo[GenerationRequestTimings]]
286-
]:
284+
) -> AsyncIterator[tuple[GenerationResponse, ScheduledRequestInfo]]:
287285
"""
288286
Process a generation request and yield progressive responses.
289287

src/guidellm/benchmark/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@
4040
BenchmarkerProgressGroup,
4141
GenerativeConsoleBenchmarkerProgress,
4242
)
43-
from .scheduler_registry import scheduler_register_benchmark_objects
44-
45-
scheduler_register_benchmark_objects()
4643

4744
__all__ = [
4845
"Aggregator",

src/guidellm/benchmark/aggregator.py

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838

3939
from guidellm.backend import (
4040
GenerationRequest,
41-
GenerationRequestTimings,
4241
GenerationResponse,
4342
)
4443
from guidellm.benchmark.objects import (
@@ -47,7 +46,6 @@
4746
GenerativeRequestStats,
4847
)
4948
from guidellm.scheduler import (
50-
MeasuredRequestTimingsT,
5149
RequestT,
5250
ResponseT,
5351
ScheduledRequestInfo,
@@ -153,7 +151,7 @@ def get_metric(
153151

154152

155153
@runtime_checkable
156-
class Aggregator(Protocol[ResponseT, RequestT, MeasuredRequestTimingsT]):
154+
class Aggregator(Protocol[ResponseT, RequestT]):
157155
"""
158156
Protocol for processing benchmark data updates during execution.
159157
@@ -167,7 +165,7 @@ def __call__(
167165
state: AggregatorState,
168166
response: ResponseT | None,
169167
request: RequestT,
170-
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
168+
request_info: ScheduledRequestInfo,
171169
scheduler_state: SchedulerState,
172170
) -> dict[str, Any] | None:
173171
"""
@@ -183,7 +181,7 @@ def __call__(
183181

184182

185183
@runtime_checkable
186-
class CompilableAggregator(Protocol[ResponseT, RequestT, MeasuredRequestTimingsT]):
184+
class CompilableAggregator(Protocol[ResponseT, RequestT]):
187185
"""
188186
Protocol for aggregators that compile final results from aggregated state.
189187
@@ -196,7 +194,7 @@ def __call__(
196194
state: AggregatorState,
197195
response: ResponseT | None,
198196
request: RequestT,
199-
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
197+
request_info: ScheduledRequestInfo,
200198
scheduler_state: SchedulerState,
201199
) -> dict[str, Any] | None:
202200
"""
@@ -225,7 +223,7 @@ def compile(
225223
class SerializableAggregator(
226224
PydanticClassRegistryMixin[type["SerializableAggregator"]],
227225
ABC,
228-
Generic[ResponseT, RequestT, MeasuredRequestTimingsT],
226+
Generic[ResponseT, RequestT],
229227
):
230228
schema_discriminator: ClassVar[str] = "type_"
231229

@@ -286,7 +284,7 @@ def __call__(
286284
state: AggregatorState,
287285
response: ResponseT | None,
288286
request: RequestT,
289-
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
287+
request_info: ScheduledRequestInfo,
290288
scheduler_state: SchedulerState,
291289
) -> dict[str, Any] | None:
292290
"""
@@ -314,9 +312,7 @@ def compile(
314312

315313

316314
@SerializableAggregator.register("inject_extras")
317-
class InjectExtrasAggregator(
318-
SerializableAggregator[ResponseT, RequestT, MeasuredRequestTimingsT], InfoMixin
319-
):
315+
class InjectExtrasAggregator(SerializableAggregator[ResponseT, RequestT], InfoMixin):
320316
"""
321317
Aggregator for injecting extra metadata into the output.
322318
"""
@@ -333,7 +329,7 @@ def __call__(
333329
state: AggregatorState,
334330
response: ResponseT | None,
335331
request: RequestT,
336-
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
332+
request_info: ScheduledRequestInfo,
337333
scheduler_state: SchedulerState,
338334
) -> dict[str, Any] | None:
339335
"""
@@ -355,9 +351,7 @@ def compile(
355351

356352

357353
@SerializableAggregator.register("scheduler_stats")
358-
class SchedulerStatsAggregator(
359-
SerializableAggregator[ResponseT, RequestT, MeasuredRequestTimingsT], InfoMixin
360-
):
354+
class SchedulerStatsAggregator(SerializableAggregator[ResponseT, RequestT], InfoMixin):
361355
"""
362356
Aggregates scheduler timing and performance metrics.
363357
@@ -376,7 +370,7 @@ def __call__(
376370
state: AggregatorState,
377371
response: ResponseT | None,
378372
request: RequestT,
379-
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
373+
request_info: ScheduledRequestInfo,
380374
scheduler_state: SchedulerState,
381375
) -> dict[str, Any] | None:
382376
"""
@@ -499,9 +493,7 @@ def compile(
499493

500494
@SerializableAggregator.register("generative_stats_progress")
501495
class GenerativeStatsProgressAggregator(
502-
SerializableAggregator[
503-
GenerationResponse, GenerationRequest, GenerationRequestTimings
504-
]
496+
SerializableAggregator[GenerationResponse, GenerationRequest]
505497
):
506498
"""
507499
Tracks generative model metrics during benchmark execution.
@@ -523,7 +515,7 @@ def __call__(
523515
state: AggregatorState,
524516
response: GenerationResponse | None,
525517
request: GenerationRequest,
526-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
518+
request_info: ScheduledRequestInfo,
527519
scheduler_state: SchedulerState,
528520
) -> dict[str, Any] | None:
529521
"""
@@ -667,9 +659,7 @@ def compile(
667659

668660
@SerializableAggregator.register("generative_requests")
669661
class GenerativeRequestsAggregator(
670-
SerializableAggregator[
671-
GenerationResponse, GenerationRequest, GenerationRequestTimings
672-
],
662+
SerializableAggregator[GenerationResponse, GenerationRequest],
673663
):
674664
"""
675665
Compiles complete generative benchmark results with warmup/cooldown filtering.
@@ -712,7 +702,7 @@ def __call__(
712702
state: AggregatorState,
713703
response: GenerationResponse | None,
714704
request: GenerationRequest,
715-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
705+
request_info: ScheduledRequestInfo,
716706
scheduler_state: SchedulerState,
717707
) -> dict[str, Any] | None:
718708
"""
@@ -875,7 +865,7 @@ def compile(
875865

876866
def _is_in_warmup(
877867
self,
878-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
868+
request_info: ScheduledRequestInfo,
879869
scheduler_state: SchedulerState,
880870
) -> bool:
881871
"""Check if the current request is within the warmup period."""
@@ -902,7 +892,7 @@ def _is_in_warmup(
902892

903893
def _is_in_cooldown(
904894
self,
905-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
895+
request_info: ScheduledRequestInfo,
906896
scheduler_state: SchedulerState,
907897
) -> bool:
908898
"""Check if the current request is within the cooldown period."""
@@ -936,7 +926,7 @@ def _create_generative_request_stats(
936926
cls,
937927
response: GenerationResponse,
938928
request: GenerationRequest,
939-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
929+
request_info: ScheduledRequestInfo,
940930
) -> GenerativeRequestStats:
941931
prompt_tokens = response.preferred_prompt_tokens(
942932
settings.preferred_prompt_tokens_source

src/guidellm/benchmark/benchmarker.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
BackendInterface,
3737
Constraint,
3838
Environment,
39-
MeasuredRequestTimingsT,
4039
NonDistributedEnvironment,
4140
RequestT,
4241
ResponseT,
@@ -51,7 +50,7 @@
5150

5251

5352
class Benchmarker(
54-
Generic[BenchmarkT, RequestT, MeasuredRequestTimingsT, ResponseT],
53+
Generic[BenchmarkT, RequestT, ResponseT],
5554
ABC,
5655
ThreadSafeSingletonMixin,
5756
):
@@ -69,13 +68,12 @@ class Benchmarker(
6968
async def run(
7069
self,
7170
requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
72-
backend: BackendInterface[RequestT, MeasuredRequestTimingsT, ResponseT],
71+
backend: BackendInterface[RequestT, ResponseT],
7372
profile: Profile,
7473
benchmark_class: type[BenchmarkT],
7574
benchmark_aggregators: dict[
7675
str,
77-
Aggregator[ResponseT, RequestT, MeasuredRequestTimingsT]
78-
| CompilableAggregator[ResponseT, RequestT, MeasuredRequestTimingsT],
76+
Aggregator[ResponseT, RequestT] | CompilableAggregator[ResponseT, RequestT],
7977
],
8078
environment: Environment | None = None,
8179
) -> AsyncIterator[
@@ -121,7 +119,7 @@ async def run(
121119
request,
122120
request_info,
123121
scheduler_state,
124-
) in Scheduler[RequestT, MeasuredRequestTimingsT, ResponseT]().run(
122+
) in Scheduler[RequestT, ResponseT]().run(
125123
requests=requests,
126124
backend=backend,
127125
strategy=strategy,
@@ -170,12 +168,11 @@ def _compile_benchmark_kwargs(
170168
run_index: int,
171169
profile: Profile,
172170
requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
173-
backend: BackendInterface[RequestT, MeasuredRequestTimingsT, ResponseT],
171+
backend: BackendInterface[RequestT, ResponseT],
174172
environment: Environment,
175173
aggregators: dict[
176174
str,
177-
Aggregator[ResponseT, RequestT, MeasuredRequestTimingsT]
178-
| CompilableAggregator[ResponseT, RequestT, MeasuredRequestTimingsT],
175+
Aggregator[ResponseT, RequestT] | CompilableAggregator[ResponseT, RequestT],
179176
],
180177
aggregators_state: dict[str, dict[str, Any]],
181178
strategy: SchedulingStrategy,

src/guidellm/benchmark/entrypoints.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
Backend,
1414
BackendType,
1515
GenerationRequest,
16-
GenerationRequestTimings,
1716
GenerationResponse,
1817
)
1918
from guidellm.benchmark.aggregator import (
@@ -266,7 +265,6 @@ async def benchmark_generative_text( # noqa: C901
266265
Benchmarker[
267266
GenerativeBenchmark,
268267
GenerationRequest,
269-
GenerationRequestTimings,
270268
GenerationResponse,
271269
]().run(
272270
requests=request_loader,

src/guidellm/benchmark/objects.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import yaml
3232
from pydantic import Field, computed_field
3333

34-
from guidellm.backend import GenerationRequestTimings
3534
from guidellm.benchmark.profile import (
3635
Profile,
3736
)
@@ -134,7 +133,7 @@ class BenchmarkMetrics(StandardBaseDict):
134133
class BenchmarkRequestStats(StandardBaseDict):
135134
"""Individual request processing statistics and scheduling metadata."""
136135

137-
scheduler_info: ScheduledRequestInfo[GenerationRequestTimings] = Field(
136+
scheduler_info: ScheduledRequestInfo = Field(
138137
description="Scheduler metadata and timing information for the request"
139138
)
140139

0 commit comments

Comments
 (0)