vllm-project
diff --git a/‎example_usage.py‎ b/‎example_usage.py‎
diff --git a/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 46 additions & 223 deletions b/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 46 additions & 223 deletions
@@ -1,70 +1,35 @@
-import time
-import uuid
-from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator, Iterable
-from pathlib import Path
+from abc import ABC
+from collections.abc import AsyncIterator, Iterable
 from typing import (
-    Any,
-    Callable,
     Generic,
     Optional,
     Union,
 )
 
-from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
-
-from guidellm.backend import Backend
 from guidellm.benchmark.aggregator import (
     AggregatorT,
     BenchmarkT,
-    GenerativeBenchmarkAggregator,
 )
-from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
 from guidellm.benchmark.profile import Profile
-from guidellm.request import (
-    GenerativeRequestLoaderDescription,
-)
 from guidellm.scheduler import (
     BackendT,
     Environment,
     RequestT,
+    RequestTimingsT,
     ResponseT,
-    ScheduledRequestInfo,
+    Scheduler,
     SchedulerState,
-    SchedulerUpdateAction,
     SchedulingStrategy,
 )
 from guidellm.utils import ThreadSafeSingletonMixin
 
-__all__ = ["Benchmarker", "GenerativeBenchmarker"]
-
-
-"""
-Scheduler:
-        requests: Iterable[
-            Union[RequestT, Iterable[Union[RequestT, tuple[RequestT, float]]]]
-        ],
-        backend: BackendT[RequestT, ResponseT],
-        strategy: SchedulingStrategy,
-        env: Environment,
-        **constraints: dict[
-            str, Union[int, float, str, ConstraintsResolveArgs, CallableConstraint]
-        ],
-
-CallableConstraint = Callable[
-    [SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction
-]
-"""
-
-
-CallableConstraintInitializer = Callable[
-    [AggregatorT, BenchmarkT],
-    Callable[[SchedulerState, ScheduledRequestInfo], SchedulerUpdateAction],
-]
+__all__ = ["Benchmarker"]
 
 
 class Benchmarker(
-    Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC, ThreadSafeSingletonMixin
+    Generic[AggregatorT, BenchmarkT, RequestT, RequestTimingsT, ResponseT],
+    ABC,
+    ThreadSafeSingletonMixin,
 ):
     async def run(
         self,
@@ -74,186 +39,44 @@ async def run(
         backend: BackendT[RequestT, ResponseT],
         profile: Profile,
         environment: Environment,
-        aggregator: type[AggregatorT],
-    ) -> AsyncGenerator[
-        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
+        aggregator_class: type[AggregatorT],
+    ) -> AsyncIterator[
+        tuple[
+            Optional[BenchmarkT],
+            AggregatorT,
+            SchedulingStrategy,
+            Optional[SchedulerState],
+        ]
     ]:
-        try:
-            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
-        except Exception:  # noqa: BLE001
-            requests_loader_size = None
-
-        strategy_limits = BenchmarkerStrategyLimits(
-            requests_loader_size=requests_loader_size,
-            max_number_per_strategy=max_number_per_strategy,
-            max_duration_per_strategy=max_duration_per_strategy,
-            warmup_percent_per_strategy=warmup_percent_per_strategy,
-            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
-        )
-        start_time = time.time()
-        end_number = len(profile.strategy_types)
-        current_index = -1
-        run_id = str(uuid.uuid4())
-
-        yield BenchmarkerResult(
-            type_="run_start",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-
-        while scheduling_strategy := profile.next_strategy():
-            current_index += 1
-            aggregator = self.create_benchmark_aggregator(
-                run_id=run_id,
-                profile=profile,
-                strategy_index=current_index,
-                strategy=scheduling_strategy,
-                limits=strategy_limits,
-            )
-
-            async for result in self.scheduler.run(
-                scheduling_strategy=scheduling_strategy,
-                max_number=max_number_per_strategy,
-                max_duration=max_duration_per_strategy,
-            ):
-                if result.type_ == "run_start":
-                    yield BenchmarkerResult(
-                        type_="scheduler_start",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
+        with self.thread_lock:
+            strategies_generator = profile.strategies_generator()
+            strategy, constraints = next(strategies_generator)
+
+            while strategy is not None:
+                aggregator = aggregator_class(
+                    strategy=strategy, constraints=constraints
+                )
+                yield None, aggregator, strategy, None
+
+                async for (
+                    response,
+                    request,
+                    request_info,
+                    scheduler_state,
+                ) in Scheduler[BackendT, RequestT, RequestTimingsT, ResponseT].run(
+                    requests=requests,
+                    backend=backend,
+                    strategy=strategy,
+                    env=environment,
+                    **constraints,
+                ):
+                    aggregator.update(
+                        response=response,
+                        request=request,
+                        request_info=request_info,
                     )
-                elif result.type_ == "run_complete":
-                    yield BenchmarkerResult(
-                        type_="scheduler_complete",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif isinstance(result, SchedulerRequestResult):
-                    aggregator.add_result(result)
-
-                    yield BenchmarkerResult(
-                        type_="scheduler_update",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=result,
-                    )
-                else:
-                    raise ValueError(f"Unexpected result type: {type(result)}")
-
-            benchmark: BenchmarkT = aggregator.compile()
-            profile.completed_strategy(
-                average_rate=benchmark.metrics.requests_per_second.successful.mean,
-                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
-            )
+                    yield None, aggregator, strategy, scheduler_state
 
-            yield BenchmarkerResult(
-                type_="benchmark_compiled",
-                start_time=start_time,
-                end_number=end_number,
-                profile=profile,
-                current_index=current_index,
-                current_strategy=scheduling_strategy,
-                current_aggregator=None,
-                current_benchmark=benchmark,
-                current_result=None,
-            )
-
-        yield BenchmarkerResult(
-            type_="run_complete",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-
-    @abstractmethod
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> AggregatorT: ...
-
-
-class GenerativeBenchmarker(
-    Benchmarker[
-        GenerativeBenchmarkAggregator,
-        GenerativeBenchmark,
-        GenerationRequest,
-        ResponseSummary,
-    ],
-):
-    def __init__(
-        self,
-        backend: Backend,
-        request_loader: Iterable[GenerationRequest],
-        request_loader_description: GenerativeRequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
-        processor_args: Optional[dict[str, Any]] = None,
-    ):
-        super().__init__(
-            worker=GenerativeRequestsWorker(backend),
-            request_loader=request_loader,
-            requests_loader_description=request_loader_description,
-            benchmark_save_extras=benchmark_save_extras,
-        )
-        self.processor = processor
-        self.processor_args = processor_args
-
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> GenerativeBenchmarkAggregator:
-        return GenerativeBenchmarkAggregator(
-            run_id=run_id,
-            args=BenchmarkArgs(
-                profile=profile,
-                strategy_index=strategy_index,
-                strategy=strategy,
-                max_number=limits.max_number,
-                max_duration=limits.max_duration,
-                warmup_number=limits.warmup_number,
-                warmup_duration=limits.warmup_duration,
-                cooldown_number=limits.cooldown_number,
-                cooldown_duration=limits.cooldown_duration,
-            ),
-            worker_description=self.worker.description,  # type: ignore[arg-type]
-            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
-            extras=self.benchmark_save_extras or {},
-            processor=self.processor,
-            processor_args=self.processor_args,
-        )
+            benchmark = aggregator.compile()
+            yield benchmark, aggregator, strategy, None
+            strategy, constraints = strategies_generator.send((benchmark, aggregator))