diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py
index a4676c7e..76324a65 100644
--- a/src/guidellm/benchmark/__init__.py
+++ b/src/guidellm/benchmark/__init__.py
@@ -1,19 +1,31 @@
-from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
-from .benchmark import (
+from .aggregator import (
+    Aggregator,
+    AggregatorState,
+    CompilableAggregator,
+    GenerativeRequestsAggregator,
+    GenerativeStatsProgressAggregator,
+    InjectExtrasAggregator,
+    SchedulerStatsAggregator,
+    SerializableAggregator,
+)
+from .benchmarker import Benchmarker
+from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
+from .objects import (
     Benchmark,
-    BenchmarkArgs,
     BenchmarkMetrics,
-    BenchmarkRunStats,
+    BenchmarkSchedulerStats,
     BenchmarkT,
     GenerativeBenchmark,
+    GenerativeBenchmarksReport,
     GenerativeMetrics,
-    GenerativeTextErrorStats,
-    GenerativeTextResponseStats,
-    StatusBreakdown,
+    GenerativeRequestStats,
+)
+from .output import (
+    GenerativeBenchmarkerConsole,
+    GenerativeBenchmarkerCSV,
+    GenerativeBenchmarkerHTML,
+    GenerativeBenchmarkerOutput,
 )
-from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
-from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
-from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
 from .profile import (
     AsyncProfile,
     ConcurrentProfile,
@@ -22,46 +34,45 @@
     SweepProfile,
     SynchronousProfile,
     ThroughputProfile,
-    create_profile,
 )
 from .progress import (
-    BenchmarkerProgressDisplay,
-    BenchmarkerTaskProgressState,
-    GenerativeTextBenchmarkerProgressDisplay,
-    GenerativeTextBenchmarkerTaskProgressState,
+    BenchmarkerProgress,
+    BenchmarkerProgressGroup,
+    GenerativeConsoleBenchmarkerProgress,
 )
 
 __all__ = [
-    "AggregatorT",
+    "Aggregator",
+    "AggregatorState",
     "AsyncProfile",
     "Benchmark",
-    "BenchmarkAggregator",
-    "BenchmarkArgs",
     "BenchmarkMetrics",
-    "BenchmarkRunStats",
+    "BenchmarkSchedulerStats",
     "BenchmarkT",
     "Benchmarker",
-    "BenchmarkerProgressDisplay",
-    "BenchmarkerResult",
-    "BenchmarkerTaskProgressState",
+    "BenchmarkerProgress",
+    "BenchmarkerProgressGroup",
+    "CompilableAggregator",
     "ConcurrentProfile",
     "GenerativeBenchmark",
-    "GenerativeBenchmarkAggregator",
-    "GenerativeBenchmarker",
-    "GenerativeBenchmarksConsole",
+    "GenerativeBenchmarkerCSV",
+    "GenerativeBenchmarkerConsole",
+    "GenerativeBenchmarkerHTML",
+    "GenerativeBenchmarkerOutput",
     "GenerativeBenchmarksReport",
+    "GenerativeConsoleBenchmarkerProgress",
     "GenerativeMetrics",
-    "GenerativeTextBenchmarkerProgressDisplay",
-    "GenerativeTextBenchmarkerTaskProgressState",
-    "GenerativeTextErrorStats",
-    "GenerativeTextResponseStats",
+    "GenerativeRequestStats",
+    "GenerativeRequestsAggregator",
+    "GenerativeStatsProgressAggregator",
+    "InjectExtrasAggregator",
     "Profile",
     "ProfileType",
-    "StatusBreakdown",
+    "SchedulerStatsAggregator",
+    "SerializableAggregator",
     "SweepProfile",
     "SynchronousProfile",
     "ThroughputProfile",
     "benchmark_generative_text",
-    "create_profile",
     "reimport_benchmarks_report",
 ]
diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py
index 9e6ffd68..9db93a12 100644
--- a/src/guidellm/benchmark/aggregator.py
+++ b/src/guidellm/benchmark/aggregator.py
@@ -1,760 +1,1260 @@
-import time
+"""
+Benchmark result aggregation and compilation interfaces.
+
+Provides protocols and implementations for collecting, processing, and compiling
+benchmark data from scheduler executions into final metrics and statistics.
+
+Classes:
+    Aggregator: Protocol for processing benchmark data updates.
+    CompilableAggregator: Protocol for aggregators that can compile final results.
+    SchedulerStatsAggregator: Aggregates scheduler timing and performance metrics.
+    GenerativeRequestsStatsProgressAggregator: Tracks generation metrics during run.
+    GenerativeRequestsAggregator: Compiles complete generative benchmark results.
+
+Functions:
+    add_aggregate_metric: Helper for accumulating timing and count metrics.
+
+Type Variables:
+    RequestT: Generic request object type.
+    ResponseT: Generic response object type.
+    RequestTimingsT: Generic request timing object type.
+"""
+
+from __future__ import annotations
+
+import math
+import random
 from abc import ABC, abstractmethod
-from pathlib import Path
 from typing import (
     Any,
+    ClassVar,
     Generic,
     Literal,
-    Optional,
-    TypeVar,
-    Union,
+    Protocol,
+    runtime_checkable,
 )
 
-from pydantic import Field
+from pydantic import Field, PrivateAttr
 
-from guidellm.backend import ResponseSummary
-from guidellm.benchmark.benchmark import (
-    BenchmarkArgs,
-    BenchmarkRunStats,
-    BenchmarkT,
-    GenerativeBenchmark,
-    GenerativeTextErrorStats,
-    GenerativeTextResponseStats,
+from guidellm.backends import (
+    GenerationRequest,
+    GenerationResponse,
 )
-from guidellm.objects import (
-    RunningStats,
-    StandardBaseModel,
-    StatusBreakdown,
-    TimeRunningStats,
+from guidellm.benchmark.objects import (
+    BenchmarkSchedulerStats,
+    GenerativeMetrics,
+    GenerativeRequestStats,
 )
-from guidellm.request import (
-    GenerationRequest,
-    GenerativeRequestLoaderDescription,
-    RequestLoaderDescription,
+from guidellm.scheduler import (
     RequestT,
     ResponseT,
-)
-from guidellm.scheduler import (
-    GenerativeRequestsWorkerDescription,
-    SchedulerRequestResult,
-    WorkerDescription,
+    ScheduledRequestInfo,
+    SchedulerState,
 )
 from guidellm.settings import settings
-from guidellm.utils import check_load_processor
+from guidellm.utils import (
+    InfoMixin,
+    PydanticClassRegistryMixin,
+    StatusBreakdown,
+    StatusDistributionSummary,
+    all_defined,
+    safe_divide,
+    safe_getattr,
+)
 
 __all__ = [
-    "AggregatorT",
-    "BenchmarkAggregator",
-    "GenerativeBenchmarkAggregator",
+    "Aggregator",
+    "AggregatorState",
+    "CompilableAggregator",
+    "GenerativeRequestsAggregator",
+    "GenerativeStatsProgressAggregator",
+    "InjectExtrasAggregator",
+    "SchedulerStatsAggregator",
+    "SerializableAggregator",
 ]
 
 
-class SchedulerRunningStats(StandardBaseModel):
+class AggregatorState(dict[str, Any]):
+    def add_metric(
+        self,
+        key: str,
+        value: int | float | None,
+        start_val: int | float | None = 0.0,
+        count: int | None = 1,
+        duration: float | None = None,
+        duration_div: Literal["total", "avg"] = "total",
+        prefix: str | None = None,
+    ):
+        """
+        Add timing or count metrics to aggregation state.
+        """
+        if prefix:
+            self.add_metric(
+                key=f"{prefix}_{key}",
+                value=value,
+                start_val=start_val,
+                count=count,
+                duration=duration,
+                duration_div=duration_div,
+            )
+            return
+
+        if not all_defined(value, start_val, count):
+            return
+
+        delta_val = value - start_val
+        self[f"{key}_total"] = self.get(f"{key}_total", 0) + delta_val
+        self[f"{key}_count"] = self.get(f"{key}_count", 0) + count
+        self[f"{key}_avg"] = safe_divide(
+            self.get(f"{key}_total"), self.get(f"{key}_count")
+        )
+
+        if all_defined(duration):
+            self[f"{key}_duration"] = duration
+            self[f"{key}_rate"] = safe_divide(
+                self.get(f"{key}_{duration_div}"), duration
+            )
+
+    def set_metric(
+        self,
+        key: str,
+        value: int | float | None,
+        type_: Literal["total", "count", "avg", "duration", "rate"],
+        prefix: str | None = None,
+    ):
+        if prefix:
+            self.set_metric(
+                key=f"{prefix}_{key}",
+                value=value,
+                type_=type_,
+                prefix=None,
+            )
+            return
+
+        self[f"{key}_{type_}"] = value
+
+    def get_metric(
+        self,
+        key: str,
+        type_: Literal["total", "count", "avg", "duration", "rate"],
+        default: int | float | None = None,
+        prefix: str | None = None,
+    ) -> int | float | None:
+        if prefix:
+            return self.get_metric(
+                key=f"{prefix}_{key}",
+                type_=type_,
+                default=default,
+            )
+
+        return self.get(f"{key}_{type_}", default)
+
+
+@runtime_checkable
+class Aggregator(Protocol[ResponseT, RequestT]):
     """
-    The metrics for the scheduler stored as running statistics for easy calculations
-    of rates, averages, totals, etc.
+    Protocol for processing benchmark data updates during execution.
+
+    Defines the interface for aggregators that collect and process request/response
+    data from scheduler executions. Implementations update aggregation state with
+    each completed request for eventual compilation into final metrics.
     """
 
-    created_requests: RunningStats = Field(
-        description=(
-            "The running statistics for the number of requests created for this "
-            "benchmark run. This includes all requests created, regardless of "
-            "their status."
-        ),
-        default_factory=RunningStats,
-    )
-    queued_requests: RunningStats = Field(
-        description=(
-            "The running statistics for the number of requests pending in queue "
-            "for this benchmark run. This includes requests that are waiting to "
-            "be scheduled."
-        ),
-        default_factory=RunningStats,
-    )
-    scheduled_requests: RunningStats = Field(
-        description=(
-            "The running statistics for the number of requests scheduled (actively "
-            "running but waiting for the desired start time) for this benchmark run."
-        ),
-        default_factory=RunningStats,
-    )
-    processing_requests: RunningStats = Field(
-        description=(
-            "The running statistics for the number of requests actively being "
-            "processed by the worker for this benchmark run."
-        ),
-        default_factory=RunningStats,
-    )
-    completed_requests: RunningStats = Field(
-        description=(
-            "The running statistics for the number of requests completed for this "
-            "benchmark run. This includes requests within the warmup and cooldown "
-            "period, if any, along with the final results."
-        ),
-        default_factory=RunningStats,
-    )
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: ResponseT | None,
+        request: RequestT,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
+        """
+        Process a completed request and update aggregation state.
+
+        :param state: Current aggregation state to update in-place.
+        :param response: Response generated for the request, if successful.
+        :param request: The processed request object.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Optional intermediate updates for progress reporting.
+        """
 
 
-class RequestsRunningStats(StandardBaseModel):
+@runtime_checkable
+class CompilableAggregator(Protocol[ResponseT, RequestT]):
     """
-    The metrics for requests that have succeeded, been canceled, or errored stored
-    as running statistics for easy calculations of rates, averages, totals, etc.
+    Protocol for aggregators that compile final results from aggregated state.
+
+    Extends the Aggregator protocol with the ability to transform accumulated
+    state into final benchmark results and metrics after execution completes.
     """
 
-    totals: StatusBreakdown[RunningStats, RunningStats, RunningStats, RunningStats] = (
-        Field(
-            description=(
-                "The running statistics for the total number of requests that "
-                "completed within the benchmark run."
-            ),
-            default_factory=lambda: StatusBreakdown(
-                successful=RunningStats(),
-                errored=RunningStats(),
-                incomplete=RunningStats(),
-                total=RunningStats(),
-            ),
-        )
-    )
-    queued_time: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time spent in queue for all requests that "
-            "completed within the benchmark run. This is the time from when the "
-            "request was created to when it was dequeued by the worker."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    scheduled_time_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time spent from when a request was "
-            "dequeued by the worker to when it was actually scheduled by the worker"
-            "for all requests that completed within the benchmark run. "
-            "This should be as close to 0 as possible, any additional time is "
-            "overheads from the system or the worker."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    scheduled_time_sleep: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time for each request spent sleeping til "
-            "the desired start time was reached for all requests that completed within "
-            "the benchmark run. This is the time from when the request was scheduled "
-            "to when the desired start time was reached. "
-        ),
-        default_factory=TimeRunningStats,
-    )
-    worker_start_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time delay between when the request was "
-            "scheduled and when the worker actually started processing subtracting any "
-            "sleep time for all requests that completed within the benchmark run. "
-            "This should be as close to 0 as possible, any additional time is "
-            "overheads from the system or the worker."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    worker_time: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time spent processing all requests that "
-            "completed within the benchmark run. This is the time from when the "
-            "request was started to when it was completed."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    worker_start_time_targeted_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the delay between the targeted start time and "
-            "the actual start time for requests that completed within the benchmark "
-            "run. This represents delays from the best case desired start time. "
-            "For async strategies, this represents delays from the ideal system. "
-            "For sync strategies, since those are doubled in queue, this should be "
-            "as close to the time for a request to be processed as possible."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    request_start_time_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the delay between the actual request being "
-            "made and the time the worker started on the request for all requests "
-            "that completed within the benchmark run. This time should be as close to "
-            "0 as possible, any additional time is overhead from the system or "
-            "the worker."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    request_start_time_targeted_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the delay between the targeted start time and "
-            "the actual start time for all requests that completed within the "
-            "benchmark run. This represents delays from the best case desired start "
-            "time. For async strategies, this represents delays from the ideal system. "
-            "For sync strategies, since those are duplicated in queue, this should be "
-            "as close to the time for a request to be processed."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    request_time_delay: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the delay in time between the total request "
-            "time and the worker time. This should be as close to 0 as possible, any "
-            "additional time is overhead from the system or the worker. "
-        ),
-        default_factory=TimeRunningStats,
-    )
-    request_time: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time spent processing all requests that "
-            "completed within the benchmark run. This is the time from when the "
-            "request was created to when it was completed."
-        ),
-        default_factory=TimeRunningStats,
-    )
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: ResponseT | None,
+        request: RequestT,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
+        """
+        Process a completed request and update aggregation state.
 
+        :param state: Current aggregation state to update in-place.
+        :param response: Response generated for the request, if successful.
+        :param request: The processed request object.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Optional intermediate updates for progress reporting.
+        """
 
-class BenchmarkAggregator(
-    ABC, StandardBaseModel, Generic[BenchmarkT, RequestT, ResponseT]
+    def compile(
+        self, state: AggregatorState, scheduler_state: SchedulerState
+    ) -> dict[str, Any]:
+        """
+        Compile aggregated state into final benchmark results.
+
+        :param agg_state: The accumulated aggregation state.
+        :param scheduler_state: Final scheduler execution state.
+        :return: Compiled benchmark results and metrics.
+        """
+
+
+class SerializableAggregator(
+    PydanticClassRegistryMixin[type["SerializableAggregator"]],
+    ABC,
+    Generic[ResponseT, RequestT],
 ):
+    schema_discriminator: ClassVar[str] = "type_"
+
+    @classmethod
+    def __pydantic_schema_base_type__(cls) -> type[SerializableAggregator]:
+        if cls.__name__ == "SerializableAggregator":
+            return cls
+
+        return SerializableAggregator
+
+    @classmethod
+    @abstractmethod
+    def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
+        """
+        Validate and process arguments for constraint creation.
+
+        Must be implemented by subclasses to handle their specific parameter patterns.
+
+        :param args: Positional arguments passed to the constraint
+        :param kwargs: Keyword arguments passed to the constraint
+        :return: Validated dictionary of parameters for constraint creation
+        :raises NotImplementedError: Must be implemented by subclasses
+        """
+        ...
+
+    @classmethod
+    def resolve(
+        cls,
+        aggregators: dict[
+            str,
+            Any | dict[str, Any] | Aggregator | CompilableAggregator,
+        ],
+    ) -> dict[str, Aggregator | CompilableAggregator]:
+        """
+        Resolve mixed aggregator specifications to callable aggregators.
+
+        :param aggregators: Dictionary mapping aggregator keys to specifications
+        :return: Dictionary mapping aggregator keys to callable functions
+        :raises ValueError: If any key is not registered in the factory
+        """
+        resolved = {}
+
+        for key, val in aggregators.items():
+            if isinstance(val, (Aggregator, CompilableAggregator)):
+                resolved[key] = val
+            else:
+                aggregator_class = cls.get_registered_object(key)
+                kwargs = aggregator_class.validated_kwargs(**val)
+                resolved[key] = aggregator_class(**kwargs)
+
+        return resolved
+
+    type_: Literal["aggregator"] = Field(default="aggregator", description="")
+
+    @abstractmethod
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: ResponseT | None,
+        request: RequestT,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
+        """
+        Process a completed request and update aggregation state.
+
+        :param agg_state: Current aggregation state to update in-place.
+        :param response: Response generated for the request, if successful.
+        :param request: The processed request object.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Optional intermediate updates for progress reporting.
+        """
+
+    @abstractmethod
+    def compile(
+        self, state: AggregatorState, scheduler_state: SchedulerState
+    ) -> dict[str, Any]:
+        """
+        Compile aggregated state into final benchmark results.
+
+        :param agg_state: The accumulated aggregation state.
+        :param scheduler_state: Final scheduler execution state.
+        :return: Compiled benchmark results and metrics.
+        """
+
+
+@SerializableAggregator.register("inject_extras")
+class InjectExtrasAggregator(SerializableAggregator[ResponseT, RequestT], InfoMixin):
     """
-    A pydantic base class representing the base class for aggregating benchmark results.
-    The purpose is to receive and process results from a Benchmarker as it iterates
-    through a Scheduler for an individual benchmark run.
-    As results are added, lightweight statistics are updated and stored for immediate
-    progress and informational updates to the caller.
-    Once the benchmark run is complete, the `compile` method is called to finalize
-    the benchmark and return a Benchmark object with all the results and statistics
-    fully calculated.
+    Aggregator for injecting extra metadata into the output.
     """
 
-    type_: Literal["benchmark_aggregator"] = "benchmark_aggregator"
-    run_id: str = Field(
-        description=(
-            "The unique identifier for the encompasing benchmark run that this "
-            "benchmark was a part of."
-        )
-    )
-    args: BenchmarkArgs = Field(
-        description=(
-            "The arguments used to create the benchmark run that this benchmark was "
-            "a part of."
-        )
-    )
-    worker_description: Union[
-        GenerativeRequestsWorkerDescription, WorkerDescription
-    ] = Field(
-        description=(
-            "The description and specifics for the worker used to resolve requests "
-            "for this benchmark."
-        ),
-        discriminator="type_",
-    )
-    request_loader_description: Union[
-        GenerativeRequestLoaderDescription, RequestLoaderDescription
-    ] = Field(
-        description=(
-            "The description and specifics for the request loader used to create "
-            "requests for this benchmark."
-        ),
-        discriminator="type_",
-    )
-    extras: dict[str, Any] = Field(
-        description=(
-            "Any additional information or metadata that was passed for this benchmark."
-        )
-    )
-    in_warmup: bool = Field(
-        description=(
-            "A flag to indicate if the benchmark is currently in the warmup phase."
-        ),
-        default=False,
-        exclude=True,
-    )
-    in_cooldown: bool = Field(
-        description=(
-            "A flag to indicate if the benchmark is currently in the cooldown phase."
-        ),
-        default=False,
-        exclude=True,
-    )
-    scheduler_stats: SchedulerRunningStats = Field(
-        description=(
-            "The running statistics for the scheduler for this benchmark run. "
-            "This includes all requests created, regardless of their status."
-        ),
-        default_factory=SchedulerRunningStats,
-    )
-    requests_stats: RequestsRunningStats = Field(
-        description=(
-            "The running statistics for the requests for this benchmark run. "
-            "This includes all requests created, regardless of their status."
-        ),
-        default_factory=RequestsRunningStats,
-    )
-    results: StatusBreakdown[
-        list[SchedulerRequestResult[RequestT, ResponseT]],
-        list[SchedulerRequestResult[RequestT, ResponseT]],
-        list[SchedulerRequestResult[RequestT, ResponseT]],
-        None,
-    ] = Field(
-        description=(
-            "The completed requests for this benchmark run broken down by status"
-            "and excluding warmup and cooldown requests."
-        ),
-        default_factory=lambda: StatusBreakdown(  # type: ignore[arg-type]
-            successful=[],
-            errored=[],
-            incomplete=[],
-            total=None,
-        ),
-    )
+    @classmethod
+    def validated_kwargs(cls, extras: dict[str, Any], **_kwargs) -> dict[str, Any]:
+        return {"extras": extras}
 
-    def add_result(
+    type_: Literal["inject_extras"] = Field(default="inject_extras")
+    extras: dict[str, Any] | None = Field(default_factory=None)
+
+    def __call__(
         self,
-        result: SchedulerRequestResult[RequestT, ResponseT],
-    ) -> bool:
+        state: AggregatorState,
+        response: ResponseT | None,
+        request: RequestT,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
         """
-        Add a result to the aggregator. This will update the internal statistics
-        and add the result to the list of results if it is not within the warmup or
-        cooldown period.
-
-        :param result: The result to add to the aggregator.
-        :return: True if the result was added, False if it was added because it
-            did not fit within the warmup or cooldown period, was not requested,
-            or is not finished
+        Inject extra metadata into the aggregation state.
+
+        :param agg_state: Current aggregation state to update.
+        :param response: Response generated for the request, if successful.
+        :param request: The processed request object.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Updated aggregation state with injected extras.
         """
-        # Add scheduler statistics
-        self.scheduler_stats.created_requests += max(
-            0, result.run_info.created_requests
-        )
-        self.scheduler_stats.queued_requests += max(0, result.run_info.queued_requests)
-        self.scheduler_stats.scheduled_requests += max(
-            0, result.run_info.scheduled_requests
-        )
-        self.scheduler_stats.processing_requests += max(
-            0, result.run_info.processing_requests
-        )
-        self.scheduler_stats.completed_requests += max(
-            0, result.run_info.completed_requests
-        )
+        _ = (state, response, request, request_info, scheduler_state)  # unused
+        return None
 
-        if result.type_ != "request_complete" or (
-            result.request_info.canceled and not result.request_info.requested
-        ):
-            # If the result is not completed yet, don't add to the results
-            # If the result was canceled and not started, ignore it
-            return False
+    def compile(
+        self, state: AggregatorState, scheduler_state: SchedulerState
+    ) -> dict[str, Any]:
+        _ = (state, scheduler_state)  # unused
+        return {"extras": self.extras} if self.extras else {}
 
-        # Add request statistics
-        self.requests_stats.totals.total += 1
-        if result.request_info.canceled:
-            self.requests_stats.totals.incomplete += 1
-        elif result.request_info.errored:
-            self.requests_stats.totals.errored += 1
-        elif result.request_info.completed:
-            self.requests_stats.totals.successful += 1
-        else:
-            raise ValueError(
-                "Unexpected state: request_info must be either "
-                "completed, canceled, or errored. "
-                f"Got {result.request_info}"
-            )
 
-        self.requests_stats.queued_time.update(
-            result.request_info.dequeued_time - result.request_info.queued_time
-        )
-        self.requests_stats.scheduled_time_delay.update(
-            result.request_info.scheduled_time - result.request_info.dequeued_time
+@SerializableAggregator.register("scheduler_stats")
+class SchedulerStatsAggregator(SerializableAggregator[ResponseT, RequestT], InfoMixin):
+    """
+    Aggregates scheduler timing and performance metrics.
+
+    Collects timing data for various scheduler phases including queuing,
+    resolution, and processing delays to generate performance statistics.
+    """
+
+    @classmethod
+    def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
+        return {}
+
+    type_: Literal["scheduler_stats"] = Field(default="scheduler_stats")
+
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: ResponseT | None,
+        request: RequestT,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
+        """
+        Aggregate scheduler timing metrics for a completed request.
+
+        :param agg_state: Current aggregation state to update.
+        :param response: Response generated for the request, if successful.
+        :param request: The processed request object.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Updated aggregation state for intermediate reporting.
+        """
+        _ = (response, request, scheduler_state)  # unused
+        if request_info.status not in ("completed", "errored", "cancelled"):
+            # Only compile scheduler stats for processed requests
+            return None
+
+        state["updated_scheduler_stats"] = True
+        state.add_metric(
+            key="queued_time",
+            value=request_info.scheduler_timings.dequeued,
+            start_val=request_info.scheduler_timings.queued,
         )
-        sleep_time = max(
-            0.0,
-            result.request_info.targeted_start_time
-            - result.request_info.scheduled_time,
+        state.add_metric(
+            key="worker_resolve_start_delay",
+            value=request_info.scheduler_timings.resolve_start,
+            start_val=request_info.scheduler_timings.scheduled_at,
         )
-        self.requests_stats.scheduled_time_sleep.update(sleep_time)
-        time_to_worker_start = (
-            result.request_info.worker_start - result.request_info.scheduled_time
+        state.add_metric(
+            key="worker_resolve_time",
+            value=request_info.scheduler_timings.resolve_end,
+            start_val=request_info.scheduler_timings.resolve_start,
         )
-        self.requests_stats.worker_start_delay.update(time_to_worker_start - sleep_time)
-        self.requests_stats.worker_time.update(
-            result.request_info.worker_end - result.request_info.worker_start
+        state.add_metric(
+            key="worker_resolve_end_delay",
+            value=request_info.scheduler_timings.resolve_end,
+            start_val=safe_getattr(request_info.request_timings, "request_end"),
         )
-        self.requests_stats.worker_start_time_targeted_delay.update(
-            result.request_info.worker_start - result.request_info.targeted_start_time
+        state.add_metric(
+            key="finalized_delay",
+            value=request_info.scheduler_timings.finalized,
+            start_val=request_info.scheduler_timings.resolve_end,
         )
-        self.requests_stats.request_start_time_delay.update(
-            result.request_info.worker_start - result.request_info.targeted_start_time
+        state.add_metric(
+            key="worker_targeted_start_delay",
+            value=request_info.scheduler_timings.resolve_start,
+            start_val=request_info.scheduler_timings.targeted_start,
         )
-        self.requests_stats.request_start_time_targeted_delay.update(
-            result.request_info.worker_start - result.request_info.targeted_start_time
+        state.add_metric(
+            key="request_start_delay",
+            value=request_info.scheduler_timings.resolve_start,
+            start_val=safe_getattr(request_info.request_timings, "request_start"),
         )
-        self.requests_stats.request_time_delay.update(
-            (result.request_info.worker_end - result.request_info.worker_start)
-            - (result.request_info.worker_end - result.request_info.worker_start)
+        state.add_metric(
+            key="request_time",
+            value=safe_getattr(request_info.request_timings, "request_end"),
+            start_val=safe_getattr(request_info.request_timings, "request_start"),
         )
-        self.requests_stats.request_time.update(
-            result.request_info.worker_end - result.request_info.worker_start
+        state.add_metric(
+            key="request_targeted_start_delay",
+            value=safe_getattr(request_info.request_timings, "request_start"),
+            start_val=request_info.scheduler_timings.targeted_start,
         )
 
-        # Add result to the list of results provided we are not in warmup or cooldown
-        total_completed = self.requests_stats.totals.total.total
-        global_start_time = self.requests_stats.totals.total.start_time
+        return state
 
-        in_warmup_number = (
-            self.args.warmup_number and total_completed <= self.args.warmup_number
-        )
-        in_warmup_duration = (
-            self.args.warmup_duration
-            and result.request_info.worker_start
-            <= (global_start_time + self.args.warmup_duration)
-        )
+    def compile(
+        self, state: AggregatorState, scheduler_state: SchedulerState
+    ) -> dict[Literal["scheduler_stats"], BenchmarkSchedulerStats]:
+        """
+        Compile scheduler timing metrics into benchmark statistics.
+
+        :param agg_state: Accumulated timing data and counts.
+        :param scheduler_state: Final scheduler execution state.
+        :return: Dictionary containing compiled scheduler statistics.
+        """
+        return {
+            "run_stats": BenchmarkSchedulerStats(
+                start_time=scheduler_state.start_time,
+                end_time=scheduler_state.end_time,
+                requests_made=StatusBreakdown[int, int, int, int](
+                    successful=scheduler_state.successful_requests,
+                    incomplete=scheduler_state.cancelled_requests,
+                    errored=scheduler_state.errored_requests,
+                    total=(
+                        scheduler_state.successful_requests
+                        + scheduler_state.cancelled_requests
+                        + scheduler_state.errored_requests
+                    ),
+                ),
+                queued_time_avg=state.get_metric(
+                    key="queued_time", type_="avg", default=0.0
+                ),
+                worker_resolve_start_delay_avg=state.get_metric(
+                    key="worker_resolve_start_delay", type_="avg", default=0.0
+                ),
+                worker_resolve_time_avg=state.get_metric(
+                    key="worker_resolve_time", type_="avg", default=0.0
+                ),
+                worker_resolve_end_delay_avg=state.get_metric(
+                    key="worker_resolve_end_delay", type_="avg"
+                ),
+                finalized_delay_avg=state.get_metric(
+                    key="finalized_delay", type_="avg", default=0.0
+                ),
+                worker_targeted_start_delay_avg=state.get_metric(
+                    key="worker_targeted_start_delay", type_="avg", default=0.0
+                ),
+                request_start_delay_avg=state.get_metric(
+                    key="request_start_delay", type_="avg", default=0.0
+                ),
+                request_time_avg=state.get_metric(
+                    key="request_time", type_="avg", default=0.0
+                ),
+                request_targeted_start_delay_avg=state.get_metric(
+                    key="request_targeted_start_delay", type_="avg", default=0.0
+                ),
+            ),
+        }
 
-        if in_warmup_number or in_warmup_duration:
-            self.in_warmup = True
-            return True
 
-        self.in_warmup = False
-        in_cooldown_number = (
-            self.args.cooldown_number
-            and self.args.max_number
-            and total_completed > self.args.max_number - self.args.cooldown_number
-        )
-        in_cooldown_duration = (
-            self.args.cooldown_duration
-            and self.args.max_duration
-            and result.request_info.worker_start
-            > global_start_time + self.args.max_duration - self.args.cooldown_duration
+@SerializableAggregator.register("generative_stats_progress")
+class GenerativeStatsProgressAggregator(
+    SerializableAggregator[GenerationResponse, GenerationRequest]
+):
+    """
+    Tracks generative model metrics during benchmark execution.
+
+    Aggregates token-level metrics including time to first token, inter-token
+    latency, and token counts for real-time progress monitoring.
+    """
+
+    @classmethod
+    def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
+        return {}
+
+    type_: Literal["generative_stats_progress"] = Field(
+        default="generative_stats_progress"
+    )
+
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: GenerationResponse | None,
+        request: GenerationRequest,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
+        """
+        Aggregate generative model metrics for a completed request.
+
+        :param agg_state: Current aggregation state to update.
+        :param response: Generation response with token and timing data.
+        :param request: The processed generation request.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: Updated aggregation state for progress reporting.
+        """
+        _ = (request,)  # unused
+        if request_info.status not in {"completed", "errored", "cancelled"}:
+            # Only compile progress stats for processed requests
+            return None
+
+        state["updated_generative_stats"] = True
+        start_time = scheduler_state.start_time
+        end_time = (
+            safe_getattr(request_info.request_timings, "request_end")
+            or request_info.scheduler_timings.resolve_end
         )
+        duration = end_time - start_time if end_time else None
 
-        if in_cooldown_number or in_cooldown_duration:
-            self.in_cooldown = True
-            return True
+        for prefix in (request_info.status, None):
+            requests_count = (
+                scheduler_state.processed_requests
+                if prefix is None
+                else scheduler_state.successful_requests
+                if request_info.status == "completed"
+                else scheduler_state.cancelled_requests
+                if request_info.status == "cancelled"
+                else scheduler_state.errored_requests
+            )
 
-        self.in_cooldown = False
+            # Requests per Second
+            if duration is not None:
+                state.set_metric(
+                    key="requests",
+                    value=safe_divide(requests_count, duration),
+                    type_="rate",
+                    prefix=prefix,
+                )
 
-        if result.request_info.canceled:
-            self.results.incomplete.append(result)
-        elif result.request_info.errored:
-            self.results.errored.append(result)
-        elif result.request_info.completed:
-            self.results.successful.append(result)
-        else:
-            raise ValueError(
-                "Unexpected state: request_info must be either "
-                "completed, canceled, or errored. "
-                f"Got {result.request_info}"
+            # Request Concurrency
+            state.set_metric(
+                key="requests",
+                value=scheduler_state.processing_requests,
+                type_="avg",
+                prefix=prefix,
             )
 
-        return True
+            # Request Latency
+            state.add_metric(
+                key="request_latency",
+                value=safe_getattr(request_info.request_timings, "request_end"),
+                start_val=safe_getattr(request_info.request_timings, "request_start"),
+                prefix=prefix,
+            )
 
-    @abstractmethod
-    def compile(self) -> BenchmarkT:
-        """
-        Compile the benchmark results and statistics into a Benchmark object.
-        This is required to be implemented by subclasses to finalize the benchmark
-        and return the compiled object.
+            # Time to First Token
+            state.add_metric(
+                key="time_to_first_token",
+                value=safe_getattr(request_info.request_timings, "first_iteration"),
+                start_val=safe_getattr(request_info.request_timings, "request_start"),
+                prefix=prefix,
+            )
+
+            output_tokens = safe_getattr(response, "output_tokens")
+            prompt_tokens = safe_getattr(response, "prompt_tokens")
+
+            # Inter Token Latency
+            state.add_metric(
+                key="inter_token_latency",
+                value=safe_getattr(request_info.request_timings, "last_iteration"),
+                start_val=safe_getattr(request_info.request_timings, "first_iteration"),
+                count=(
+                    output_tokens - 1 if output_tokens and output_tokens > 1 else None
+                ),
+                prefix=prefix,
+            )
+
+            # Time per Output Token
+            state.add_metric(
+                key="time_per_output_token",
+                value=safe_getattr(request_info.request_timings, "request_start"),
+                start_val=safe_getattr(request_info.request_timings, "last_iteration"),
+                count=output_tokens,
+                prefix=prefix,
+            )
+
+            # Prompt Tokens
+            state.add_metric(
+                key="prompt_tokens",
+                value=prompt_tokens,
+                duration=duration,
+                prefix=prefix,
+            )
+
+            # Output Tokens
+            state.add_metric(
+                key="output_tokens",
+                value=output_tokens,
+                duration=duration,
+                prefix=prefix,
+            )
+
+            # Total Tokens
+            state.add_metric(
+                key="total_tokens",
+                value=(
+                    prompt_tokens + output_tokens
+                    if all_defined(prompt_tokens, output_tokens)
+                    else prompt_tokens
+                    if all_defined(prompt_tokens)
+                    else output_tokens
+                    if all_defined(output_tokens)
+                    else None
+                ),
+                duration=duration,
+                prefix=prefix,
+            )
+
+        return state
+
+    def compile(
+        self, state: AggregatorState, scheduler_state: SchedulerState
+    ) -> dict[str, Any]:
         """
-        ...
+        Compile progress metrics into final results.
 
+        GenerativeStatsProgressAggregator is primarily for progress tracking,
+        so compilation returns the aggregated state as-is.
 
-AggregatorT = TypeVar("AggregatorT", bound=BenchmarkAggregator)
+        :param agg_state: The accumulated aggregation state.
+        :param scheduler_state: Final scheduler execution state.
+        :return: The aggregated state as final results.
+        """
+        _ = (state, scheduler_state)  # unused
+        return {}
 
 
-class GenerativeRequestsRunningStats(RequestsRunningStats):
+@SerializableAggregator.register("generative_requests")
+class GenerativeRequestsAggregator(
+    SerializableAggregator[GenerationResponse, GenerationRequest],
+):
     """
-    The metrics for generative requests that have succeeded, been canceled, or errored
-    stored as running statistics for easy calculations of rates, averages, totals, etc.
+    Compiles complete generative benchmark results with warmup/cooldown filtering.
+
+    Aggregates request data during execution and compiles comprehensive metrics
+    including timing distributions, token statistics, and throughput measurements.
+    Supports filtering warmup and cooldown periods from final results.
     """
 
-    time_to_first_token: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time from the start of the request to the "
-            "first token being generated for all requests that completed within the "
-            "benchmark run."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    inter_token_latency: TimeRunningStats = Field(
-        description=(
-            "The running statistics for the time between each token being generated "
-            "for all requests that completed within the benchmark run."
-        ),
-        default_factory=TimeRunningStats,
-    )
-    prompt_tokens: RunningStats = Field(
-        description=(
-            "The running statistics for the token count for the prompt for all "
-            "requests that completed, if available in the response."
-        ),
-        default_factory=RunningStats,
-    )
-    output_tokens: RunningStats = Field(
-        description=(
-            "The running statistics for the token count for the output for all "
-            "requests that completed, if available in the response."
-        ),
-        default_factory=RunningStats,
-    )
-    total_tokens: RunningStats = Field(
-        description=(
-            "The running statistics for the total token count for all requests that "
-            "completed, if available in the response."
-        ),
-        default_factory=RunningStats,
-    )
+    @classmethod
+    def validated_kwargs(
+        cls,
+        request_samples: int | None = 20,
+        warmup: int | float | None = None,
+        cooldown: int | float | None = None,
+        **_kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "request_samples": request_samples,
+            "warmup": warmup,
+            "cooldown": cooldown,
+        }
 
+    type_: Literal["generative_requests"] = Field(default="generative_requests")
 
-class GenerativeBenchmarkAggregator(
-    BenchmarkAggregator[GenerativeBenchmark, GenerationRequest, ResponseSummary]
-):
-    type_: Literal["generative_benchmark_aggregator"] = (
-        "generative_benchmark_aggregator"  # type: ignore[assignment]
-    )
-    processor: Optional[Union[str, Path, Any]] = Field(
-        description=(
-            "The tokenizer to use for calculating token counts when none are "
-            "avaiable that match the preferred source."
-        )
+    request_samples: int | None = Field(default=20, description="")
+    warmup: int | float | None = Field(
+        default=None,
+        description="Number of warmup requests to ignore at benchmark start",
     )
-    processor_args: Optional[dict[str, Any]] = Field(
-        description=(
-            "Additional arguments to pass to the tokenizer if it requires "
-            "any specific configuration for loading or processing."
-        ),
-    )
-    worker_description: GenerativeRequestsWorkerDescription = Field(
-        description=(
-            "The description and specifics for the worker used to resolve requests "
-            "for this benchmark."
-        ),
-        discriminator="type_",
-    )
-    request_loader_description: GenerativeRequestLoaderDescription = Field(
-        description=(
-            "The description and specifics for the request loader used to create "
-            "requests for this benchmark."
-        ),
-        discriminator="type_",
-    )
-    requests_stats: GenerativeRequestsRunningStats = Field(
-        description=(
-            "The running statistics for the requests for this benchmark run. "
-            "This includes all requests created, regardless of their status."
-        ),
-        default_factory=GenerativeRequestsRunningStats,
+    cooldown: int | float | None = Field(
+        default=None,
+        description="Number of cooldown requests to ignore at benchmark end",
     )
+    _in_cooldown: bool = PrivateAttr(False)
+    _in_warmup: bool = PrivateAttr(False)
 
-    def add_result(
-        self, result: SchedulerRequestResult[GenerationRequest, ResponseSummary]
-    ) -> bool:
+    def __call__(
+        self,
+        state: AggregatorState,
+        response: GenerationResponse | None,
+        request: GenerationRequest,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> dict[str, Any] | None:
         """
-        Add a result to the aggregator. This will update the internal statistics
-        and add the result to the list of results if it is not within the warmup or
-        cooldown period.
+        Collect completed requests for final compilation.
 
-        :param result: The result to add to the aggregator.
+        Filters requests based on warmup/cooldown settings and categorizes by
+        completion status for comprehensive benchmark analysis.
+
+        :param agg_state: Current aggregation state to update.
+        :param response: Generation response data.
+        :param request: The processed generation request.
+        :param request_info: Scheduling metadata and timing information.
+        :param scheduler_state: Current scheduler execution state.
+        :return: None, as this aggregator only collects for final compilation.
         """
-        if not super().add_result(result):
-            return False
+        # Skip invalid requests
+        if request_info.status not in {"completed", "canceled", "errored"} or (
+            request_info.status == "canceled"
+            and safe_getattr(request_info.scheduler_timings, "resolve_start") is None
+            # Canceled requests that never started should not be kept
+        ):
+            return None
 
-        if result.request is None:
-            raise ValueError("Request is None, cannot add result.")
+        status = {
+            "updated_generative_requests": True,
+            "requests_in_warmup": False,
+            "requests_in_cooldown": False,
+        }
 
-        if result.response is None:
-            raise ValueError("Response is None, cannot add result.")
+        if self._is_in_warmup(request_info, scheduler_state):
+            status["requests_in_warmup"] = True
+            return status
 
-        self.requests_stats.request_start_time_delay.update(
-            result.response.start_time - result.request_info.worker_start
-        )
-        self.requests_stats.request_start_time_targeted_delay.update(
-            result.response.start_time - result.request_info.targeted_start_time
-        )
-        self.requests_stats.request_time_delay.update(
-            (result.response.start_time - result.request_info.worker_start)
-            + result.request_info.worker_end
-            - result.response.end_time
-        )
-        self.requests_stats.request_time.update(
-            result.response.end_time - result.response.start_time
-        )
-        if result.response.first_iter_time:
-            self.requests_stats.time_to_first_token.update(
-                result.response.first_iter_time - result.response.start_time
-            )
-        if result.response.last_iter_time and result.response.first_iter_time:
-            self.requests_stats.inter_token_latency.update(
-                result.response.last_iter_time - result.response.first_iter_time,
-                count=(result.response.output_tokens or 1) - 1,
-            )
-        self.requests_stats.prompt_tokens += result.response.request_prompt_tokens or 0
-        self.requests_stats.output_tokens += result.response.request_output_tokens or 0
-        total_tokens = (result.response.request_prompt_tokens or 0) + (
-            result.response.request_output_tokens or 0
-        )
-        self.requests_stats.total_tokens += total_tokens
+        if self._is_in_cooldown(request_info, scheduler_state):
+            status["requests_in_cooldown"] = True
+            return status
 
-        return True
+        if "completed" not in state:
+            state["completed"] = []
+            state["errored"] = []
+            state["incomplete"] = []
 
-    def compile(self) -> GenerativeBenchmark:
+        # Categorize request by status
+        if request_info.status == "completed":
+            state["completed"].append((response, request, request_info))
+        elif request_info.status == "canceled":
+            state["incomplete"].append((response, request, request_info))
+        else:
+            state["errored"].append((response, request, request_info))
+
+        return status
+
+    def compile(
+        self,
+        state: AggregatorState,
+        scheduler_state: SchedulerState,  # noqa: ARG002
+    ) -> dict[str, Any]:
         """
-        Compile the benchmark results and statistics into a GenerativeBenchmark object.
-        This is required to be implemented by subclasses to finalize the benchmark
-        and return the compiled object.
+        Compile aggregated requests into comprehensive benchmark results.
+
+        Transforms collected request data into detailed metrics including timing
+        distributions, token statistics, throughput measurements, and status breakdowns.
+
+        :param agg_state: Accumulated request data categorized by completion status.
+        :param scheduler_state: Final scheduler execution state.
+        :return: Complete benchmark results with metrics and request statistics.
         """
-        successful, incomplete, errored = self._compile_results()
-
-        return GenerativeBenchmark.from_stats(
-            run_id=self.run_id,
-            successful=successful,
-            incomplete=incomplete,
-            errored=errored,
-            args=self.args,
-            run_stats=BenchmarkRunStats(
-                start_time=self.requests_stats.totals.total.start_time,
-                end_time=time.time(),
-                requests_made=StatusBreakdown(
-                    successful=int(self.requests_stats.totals.successful.total),
-                    errored=int(self.requests_stats.totals.errored.total),
-                    incomplete=int(self.requests_stats.totals.incomplete.total),
-                    total=int(self.requests_stats.totals.total.total),
-                ),
-                queued_time_avg=self.requests_stats.queued_time.mean,
-                scheduled_time_delay_avg=self.requests_stats.scheduled_time_delay.mean,
-                scheduled_time_sleep_avg=self.requests_stats.scheduled_time_sleep.mean,
-                worker_start_delay_avg=self.requests_stats.worker_start_delay.mean,
-                worker_time_avg=self.requests_stats.worker_time.mean,
-                worker_start_time_targeted_delay_avg=self.requests_stats.worker_start_time_targeted_delay.mean,
-                request_start_time_delay_avg=self.requests_stats.request_start_time_delay.mean,
-                request_start_time_targeted_delay_avg=self.requests_stats.request_start_time_targeted_delay.mean,
-                request_time_delay_avg=self.requests_stats.request_time_delay.mean,
-                request_time_avg=self.requests_stats.request_time.mean,
-            ),
-            worker=self.worker_description,
-            requests_loader=self.request_loader_description,
-            extras=self.extras,
+        successful: list[GenerativeRequestStats] = [
+            self._create_generative_request_stats(response, request, request_info)
+            for (response, request, request_info) in state.get("completed", [])
+        ]
+        incomplete: list[GenerativeRequestStats] = [
+            self._create_generative_request_stats(response, request, request_info)
+            for (response, request, request_info) in state.get("incomplete", [])
+        ]
+        errored: list[GenerativeRequestStats] = [
+            self._create_generative_request_stats(response, request, request_info)
+            for (response, request, request_info) in state.get("errored", [])
+        ]
+
+        # Use all requests for metrics calculations (not sampled)
+        total: list[GenerativeRequestStats] = successful + incomplete + errored
+        total_types: list[Literal["successful", "incomplete", "error"]] = [
+            *["successful"] * len(successful),
+            *["incomplete"] * len(incomplete),
+            *["error"] * len(errored),
+        ]
+        start_time = min(
+            [math.inf]
+            + [
+                req.scheduler_info.request_timings.request_start
+                for req in total
+                if req.scheduler_info.request_timings.request_start is not None
+            ]
+        )
+        end_time = max(
+            [-1 * math.inf]
+            + [
+                req.scheduler_info.request_timings.request_end
+                for req in total
+                if req.scheduler_info.request_timings.request_end is not None
+            ]
         )
 
-    def _compile_results(
-        self,
-    ) -> tuple[
-        list[GenerativeTextResponseStats],
-        list[GenerativeTextErrorStats],
-        list[GenerativeTextErrorStats],
-    ]:
-        successful: list[GenerativeTextResponseStats] = [
-            GenerativeTextResponseStats(
-                request_id=result.request.request_id,
-                request_type=result.request.request_type,
-                scheduler_info=result.request_info,
-                prompt=str(result.request.content),
-                prompt_tokens=self._compile_tokens_count(
-                    value=str(result.request.content),
-                    requests_tokens=result.response.request_prompt_tokens,
-                    response_tokens=result.response.response_prompt_tokens,
-                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
-                    errored=False,
+        return {
+            "start_time": start_time,
+            "end_time": end_time,
+            "request_totals": StatusBreakdown[int, int, int, int](
+                successful=len(successful),
+                incomplete=len(incomplete),
+                errored=len(errored),
+                total=len(total),
+            ),
+            "requests": StatusBreakdown[
+                list[GenerativeRequestStats],
+                list[GenerativeRequestStats],
+                list[GenerativeRequestStats],
+                list[GenerativeRequestStats],
+            ](
+                successful=self._sample_request_stats(successful, self.request_samples),
+                incomplete=self._sample_request_stats(incomplete, self.request_samples),
+                errored=self._sample_request_stats(errored, self.request_samples),
+            ),
+            "metrics": GenerativeMetrics(
+                requests_per_second=self._calculate_requests_per_second(
+                    statuses=total_types, requests=total
                 ),
-                output=result.response.value,
-                output_tokens=self._compile_tokens_count(
-                    value=result.response.value,
-                    requests_tokens=result.response.request_output_tokens,
-                    response_tokens=result.response.response_output_tokens,
-                    preferred_tokens_source=settings.preferred_output_tokens_source,
-                    errored=False,
+                request_concurrency=self._calculate_request_concurrency(
+                    statuses=total_types, requests=total
                 ),
-                start_time=result.response.start_time,
-                end_time=result.response.end_time,
-                first_token_time=result.response.first_iter_time or -1.0,
-                last_token_time=result.response.last_iter_time or -1.0,
-            )
-            for result in self.results.successful
-            if result.request and result.response
-        ]
-        incomplete: list[GenerativeTextErrorStats] = [
-            GenerativeTextErrorStats(
-                error=result.response.error or "",
-                request_id=result.request.request_id,
-                request_type=result.request.request_type,
-                scheduler_info=result.request_info,
-                prompt=str(result.request.content),
-                prompt_tokens=self._compile_tokens_count(
-                    value=str(result.request.content),
-                    requests_tokens=result.response.request_prompt_tokens,
-                    response_tokens=result.response.response_prompt_tokens,
-                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
-                    errored=True,
+                request_latency=self._calculate_request_latency(
+                    statuses=total_types, requests=total
                 ),
-                output=result.response.value,
-                output_tokens=self._compile_tokens_count(
-                    value=result.response.value,
-                    requests_tokens=result.response.request_output_tokens,
-                    response_tokens=result.response.response_output_tokens,
-                    preferred_tokens_source=settings.preferred_output_tokens_source,
-                    errored=True,
+                prompt_token_count=self._calculate_prompt_token_count(
+                    statuses=total_types, requests=total
                 ),
-                start_time=result.response.start_time,
-                end_time=result.response.end_time,
-                first_token_time=result.response.first_iter_time,
-                last_token_time=result.response.last_iter_time,
-            )
-            for result in self.results.incomplete
-            if result.request and result.response
-        ]
-        error: list[GenerativeTextErrorStats] = [
-            GenerativeTextErrorStats(
-                error=result.response.error or "",
-                request_id=result.request.request_id,
-                request_type=result.request.request_type,
-                scheduler_info=result.request_info,
-                prompt=str(result.request.content),
-                prompt_tokens=self._compile_tokens_count(
-                    value=str(result.request.content),
-                    requests_tokens=result.response.request_prompt_tokens,
-                    response_tokens=result.response.response_prompt_tokens,
-                    preferred_tokens_source=settings.preferred_prompt_tokens_source,
-                    errored=True,
+                output_token_count=self._calculate_output_token_count(
+                    statuses=total_types, requests=total
+                ),
+                total_token_count=self._calculate_total_token_count(
+                    statuses=total_types, requests=total
+                ),
+                time_to_first_token_ms=self._calculate_time_to_first_token_ms(
+                    statuses=total_types, requests=total
+                ),
+                time_per_output_token_ms=self._calculate_time_per_output_token_ms(
+                    statuses=total_types, requests=total
+                ),
+                inter_token_latency_ms=self._calculate_inter_token_latency_ms(
+                    statuses=total_types, requests=total
                 ),
-                output=result.response.value,
-                output_tokens=self._compile_tokens_count(
-                    value=result.response.value,
-                    requests_tokens=result.response.request_output_tokens,
-                    response_tokens=result.response.response_output_tokens,
-                    preferred_tokens_source=settings.preferred_output_tokens_source,
-                    errored=True,
+                output_tokens_per_second=self._calculate_output_tokens_per_second(
+                    statuses=total_types, requests=total
                 ),
-                start_time=result.response.start_time,
-                end_time=result.response.end_time,
-                first_token_time=result.response.first_iter_time,
-                last_token_time=result.response.last_iter_time,
+                tokens_per_second=self._calculate_tokens_per_second(
+                    statuses=total_types, requests=total
+                ),
+            ),
+        }
+
+    def _is_in_warmup(
+        self,
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> bool:
+        """Check if the current request is within the warmup period."""
+        if self.warmup is None:
+            return False
+
+        if 0 < self.warmup < 1:  # Percentage-based warmup
+            return (
+                scheduler_state.remaining_fraction is not None
+                and scheduler_state.remaining_fraction > (1 - self.warmup)
+            )
+
+        if self.warmup >= 1:  # Count/time-based warmup
+            if scheduler_state.processed_requests < self.warmup:
+                return True
+
+            current_time = request_info.scheduler_timings.targeted_start
+            return (
+                current_time is not None
+                and (current_time - scheduler_state.start_time) < self.warmup
             )
-            for result in self.results.errored
-            if result.request and result.response
-        ]
 
-        return successful, incomplete, error
+        return False
 
-    def _compile_tokens_count(
+    def _is_in_cooldown(
         self,
-        value: str,
-        requests_tokens: Optional[int],
-        response_tokens: Optional[int],
-        preferred_tokens_source: Optional[Literal["request", "response", "local"]],
-        errored: bool,
-    ) -> int:
-        if not errored and preferred_tokens_source == "response" and response_tokens:
-            return response_tokens or 0
-
-        if not errored and preferred_tokens_source == "request" and requests_tokens:
-            return requests_tokens or 0
-
-        if preferred_tokens_source in {"response", "request"} and (
-            self.processor is None or errored or response_tokens or requests_tokens
-        ):
-            # we had a preferred tokens source that isn't local and we either
-            # have the data to return something or we don't have the ability
-            # to calculate locally
-            return response_tokens or requests_tokens or 0
-
-        self.processor = check_load_processor(
-            self.processor,
-            processor_args=self.processor_args,
-            error_msg="Processor/Tokenizer is required for calculating token counts.",
+        request_info: ScheduledRequestInfo,
+        scheduler_state: SchedulerState,
+    ) -> bool:
+        """Check if the current request is within the cooldown period."""
+        if self.cooldown is None:
+            return False
+
+        if 0 < self.cooldown < 1:  # Percentage-based cooldown
+            return (
+                scheduler_state.remaining_fraction is not None
+                and scheduler_state.remaining_fraction < self.cooldown
+            )
+
+        if self.cooldown >= 1:  # Count/time-based cooldown
+            if scheduler_state.remaining_requests <= self.cooldown:
+                return True
+
+            current_time = (
+                request_info.scheduler_timings.resolve_end
+                or request_info.scheduler_timings.targeted_start
+            )
+            return (
+                current_time is not None
+                and scheduler_state.remaining_duration is not None
+                and scheduler_state.remaining_duration < self.cooldown
+            )
+
+        return False
+
+    @classmethod
+    def _create_generative_request_stats(
+        cls,
+        response: GenerationResponse,
+        request: GenerationRequest,
+        request_info: ScheduledRequestInfo,
+    ) -> GenerativeRequestStats:
+        prompt_tokens = response.preferred_prompt_tokens(
+            settings.preferred_prompt_tokens_source
+        )
+        output_tokens = response.preferred_output_tokens(
+            settings.preferred_output_tokens_source
+        )
+
+        return GenerativeRequestStats(
+            request_id=request.request_id,
+            request_type=request.request_type,
+            prompt=str(request.content),
+            request_args=response.request_args,
+            output=response.value,
+            iterations=response.iterations,
+            prompt_tokens=prompt_tokens,
+            output_tokens=output_tokens,
+            total_tokens=(
+                prompt_tokens + output_tokens
+                if prompt_tokens is not None and output_tokens is not None
+                else None
+            ),
+            scheduler_info=request_info,
+        )
+
+    @classmethod
+    def _sample_request_stats(
+        cls, stats: list[GenerativeRequestStats], sample_size: int | None
+    ) -> list[GenerativeRequestStats]:
+        if sample_size is None or sample_size <= 0 or not stats:
+            return stats
+
+        return random.sample(stats, min(sample_size, len(stats)))
+
+    @classmethod
+    def _calculate_requests_per_second(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_times = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(
+                safe_getattr(request.scheduler_info.request_timings, "request_start"),
+                safe_getattr(request.scheduler_info.request_timings, "request_end"),
+            ):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_times.append(
+                (
+                    request.scheduler_info.request_timings.request_start,
+                    request.scheduler_info.request_timings.request_end,
+                )
+            )
+
+        return StatusDistributionSummary.from_request_times(
+            request_types=filtered_statuses,
+            requests=filtered_times,
+            distribution_type="rate",
+        )
+
+    @classmethod
+    def _calculate_request_concurrency(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_times = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(
+                safe_getattr(request.scheduler_info.request_timings, "request_start"),
+                safe_getattr(request.scheduler_info.request_timings, "request_end"),
+            ):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_times.append(
+                (
+                    request.scheduler_info.request_timings.request_start,
+                    request.scheduler_info.request_timings.request_end,
+                )
+            )
+
+        return StatusDistributionSummary.from_request_times(
+            request_types=filtered_statuses,
+            requests=filtered_times,
+            distribution_type="concurrency",
+        )
+
+    @classmethod
+    def _calculate_request_latency(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.request_latency):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.request_latency)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+        )
+
+    @classmethod
+    def _calculate_prompt_token_count(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.prompt_tokens):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.prompt_tokens)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+        )
+
+    @classmethod
+    def _calculate_output_token_count(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.output_tokens):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.output_tokens)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+        )
+
+    @classmethod
+    def _calculate_total_token_count(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.total_tokens):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.total_tokens)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+        )
+
+    @classmethod
+    def _calculate_time_to_first_token_ms(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.time_to_first_token_ms):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.time_to_first_token_ms)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+        )
+
+    @classmethod
+    def _calculate_time_per_output_token_ms(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+        filtered_weights = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.time_to_first_token_ms):
+                continue
+
+            # Add time to first token separately to better reflect in distribution
+            filtered_statuses.append(status)
+            filtered_values.append(request.time_to_first_token_ms)
+            filtered_weights.append(1)
+
+            if not all_defined(request.inter_token_latency_ms):
+                continue
+
+            # Add tokens after the first token to get the full distribution
+            filtered_statuses.append(status)
+            filtered_values.append(request.inter_token_latency_ms)
+            filtered_weights.append(request.output_tokens - 1)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+            weights=filtered_weights,
+        )
+
+    @classmethod
+    def _calculate_inter_token_latency_ms(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_values = []
+        filtered_weights = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.inter_token_latency_ms):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_values.append(request.inter_token_latency_ms)
+            filtered_weights.append(request.output_tokens - 1)
+
+        return StatusDistributionSummary.from_values(
+            value_types=filtered_statuses,
+            values=filtered_values,
+            weights=filtered_weights,
+        )
+
+    @classmethod
+    def _calculate_output_tokens_per_second(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_request_times = []
+        filtered_first_iter_times = []
+        filtered_iter_counts = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.output_tokens_per_second):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_request_times.append(
+                (
+                    request.scheduler_info.request_timings.request_start,
+                    request.scheduler_info.request_timings.request_end,
+                )
+            )
+            filtered_first_iter_times.append(
+                request.scheduler_info.request_timings.first_iteration
+            )
+            filtered_iter_counts.append(request.output_tokens)
+
+        return StatusDistributionSummary.from_iterable_request_times(
+            request_types=filtered_statuses,
+            requests=filtered_request_times,
+            first_iter_times=filtered_first_iter_times,
+            iter_counts=filtered_iter_counts,
+        )
+
+    @classmethod
+    def _calculate_tokens_per_second(
+        cls,
+        statuses: list[Literal["successful", "incomplete", "error"]],
+        requests: list[GenerativeRequestStats],
+    ) -> StatusDistributionSummary:
+        filtered_statuses = []
+        filtered_request_times = []
+        filtered_first_iter_times = []
+        filtered_iter_counts = []
+        filtered_first_iter_counts = []
+
+        for status, request in zip(statuses, requests):
+            if not all_defined(request.tokens_per_second):
+                continue
+
+            filtered_statuses.append(status)
+            filtered_request_times.append(
+                (
+                    request.scheduler_info.request_timings.request_start,
+                    request.scheduler_info.request_timings.request_end,
+                )
+            )
+            filtered_first_iter_times.append(
+                request.scheduler_info.request_timings.first_iteration
+            )
+            filtered_iter_counts.append(request.output_tokens - 1)
+            filtered_first_iter_counts.append(request.prompt_tokens + 1)
+
+        return StatusDistributionSummary.from_iterable_request_times(
+            request_types=filtered_statuses,
+            requests=filtered_request_times,
+            first_iter_times=filtered_first_iter_times,
+            iter_counts=filtered_iter_counts,
+            first_iter_counts=filtered_first_iter_counts,
         )
-        return len(self.processor.tokenize(value))
diff --git a/src/guidellm/benchmark/benchmark.py b/src/guidellm/benchmark/benchmark.py
deleted file mode 100644
index 02eea02b..00000000
--- a/src/guidellm/benchmark/benchmark.py
+++ /dev/null
@@ -1,837 +0,0 @@
-import random
-import uuid
-from typing import Any, Literal, Optional, TypeVar, Union
-
-from pydantic import Field, computed_field
-
-from guidellm.benchmark.profile import (
-    AsyncProfile,
-    ConcurrentProfile,
-    Profile,
-    SweepProfile,
-    SynchronousProfile,
-    ThroughputProfile,
-)
-from guidellm.objects import (
-    StandardBaseModel,
-    StatusBreakdown,
-    StatusDistributionSummary,
-)
-from guidellm.request import (
-    GenerativeRequestLoaderDescription,
-    RequestLoaderDescription,
-)
-from guidellm.scheduler import (
-    AsyncConstantStrategy,
-    AsyncPoissonStrategy,
-    ConcurrentStrategy,
-    GenerativeRequestsWorkerDescription,
-    SchedulerRequestInfo,
-    SchedulingStrategy,
-    SynchronousStrategy,
-    ThroughputStrategy,
-    WorkerDescription,
-)
-
-__all__ = [
-    "Benchmark",
-    "BenchmarkArgs",
-    "BenchmarkMetrics",
-    "BenchmarkRunStats",
-    "BenchmarkT",
-    "GenerativeBenchmark",
-    "GenerativeMetrics",
-    "GenerativeTextErrorStats",
-    "GenerativeTextResponseStats",
-    "StatusBreakdown",
-]
-
-
-class BenchmarkArgs(StandardBaseModel):
-    """
-    A serializable model representing the arguments used to specify a benchmark run
-    and how data was collected for it.
-    """
-
-    profile: Union[
-        AsyncProfile,
-        SweepProfile,
-        ConcurrentProfile,
-        ThroughputProfile,
-        SynchronousProfile,
-        Profile,
-    ] = Field(
-        description=(
-            "The profile used for the entire benchmark run that the strategy for "
-            "this benchmark was pulled from."
-        ),
-        discriminator="type_",
-    )
-    strategy_index: int = Field(
-        description=(
-            "The index of the strategy in the profile that was used for this benchmark."
-        )
-    )
-    strategy: Union[
-        ConcurrentStrategy,
-        SchedulingStrategy,
-        ThroughputStrategy,
-        SynchronousStrategy,
-        AsyncPoissonStrategy,
-        AsyncConstantStrategy,
-        SchedulingStrategy,
-    ] = Field(
-        description="The scheduling strategy used to run this benchmark. ",
-        discriminator="type_",
-    )
-    max_number: Optional[int] = Field(
-        description="The maximum number of requests to run for this benchmark, if any."
-    )
-    max_duration: Optional[float] = Field(
-        description="The maximum duration in seconds to run this benchmark, if any."
-    )
-    warmup_number: Optional[int] = Field(
-        description=(
-            "The number of requests to run for the warmup phase of this benchmark, "
-            "if any. These are requests that were not included in the final results."
-        )
-    )
-    warmup_duration: Optional[float] = Field(
-        description=(
-            "The duration in seconds to run for the warmup phase of this benchmark, "
-            "if any. These are requests that were not included in the final results."
-        )
-    )
-    cooldown_number: Optional[int] = Field(
-        description=(
-            "The number of requests to run for the cooldown phase of this benchmark, "
-            "if any. These are requests that were not included in the final results."
-        )
-    )
-    cooldown_duration: Optional[float] = Field(
-        description=(
-            "The duration in seconds to run for the cooldown phase of this benchmark, "
-            "if any. These are requests that were not included in the final results."
-        )
-    )
-
-
-class BenchmarkRunStats(StandardBaseModel):
-    """
-    A serializable model representing the run process statistics for the
-    entire benchmark run across all requests including warmup and cooldown.
-    """
-
-    start_time: float = Field(
-        description="The start time of the benchmark run.",
-    )
-    end_time: float = Field(
-        description="The end time of the benchmark run.",
-    )
-    requests_made: StatusBreakdown[int, int, int, int] = Field(
-        description=(
-            "The number of requests made for the benchmark run broken down by "
-            "status including successful, incomplete, errored, and the sum of all three"
-        )
-    )
-    queued_time_avg: float = Field(
-        description=(
-            "The average time spent in the queue for each request in the benchmark "
-            "run until it was dequeued by a worker."
-        )
-    )
-    scheduled_time_delay_avg: float = Field(
-        description=(
-            "The average time delay between when a request was dequeued and when it "
-            "was scheduled to be processed by a worker in the benchmark run. "
-            "This should be as close to 0 as possible, any additional time is "
-            "overheads from the system or the worker."
-        )
-    )
-    scheduled_time_sleep_avg: float = Field(
-        description=(
-            "The average time spent sleeping til the desired start time was reached "
-            "after being scheduled by the worker in the benchmark run."
-        )
-    )
-    worker_start_delay_avg: float = Field(
-        description=(
-            "The average time delay between when a request was scheduled and when "
-            "the worker started processing it in the benchmark run. "
-            "This should be as close to 0 as possible, any additional time is "
-            "overheads from the system or the worker."
-        )
-    )
-    worker_time_avg: float = Field(
-        description=(
-            "The average time taken by the worker to process each request in the "
-            "benchmark run. This includes the time to generate the response and "
-            "any additional processing time."
-        )
-    )
-    worker_start_time_targeted_delay_avg: float = Field(
-        description=(
-            "The average time delay between when a request was targeted to start "
-            "and when the worker actually started processing it in the benchmark "
-            "run. For async strategies, this represents delays from the ideal "
-            "system. For sync strategies, since those are doubled in queue, "
-            "this should be as close to the time for a request to be processed "
-            "as possible. Any additional time is overhead from the system or "
-            "the worker."
-        )
-    )
-    request_start_time_delay_avg: float = Field(
-        description=(
-            "The average time delay between the actual request being made "
-            "and the time the worker started on the request for all requests "
-            "that completed within the benchmark run. This time should be as close "
-            "to 0 as possible, any additional time is overhead from the system or "
-            "the worker."
-        )
-    )
-    request_start_time_targeted_delay_avg: float = Field(
-        description=(
-            "The average time delay between when the targeted start time and "
-            "the actual start time for each request in the benchmark run. "
-            "For async strategies, this represents delays from the ideal "
-            "system. For sync strategies, this should be as close to the "
-            "time for a request to be processed as possible. Any additional "
-            "time is overhead from the system or the worker."
-        )
-    )
-    request_time_delay_avg: float = Field(
-        description=(
-            "The average time delay between the total request time and the "
-            "worker time. This should be as close to 0 as possible, any additional "
-            "time is overhead from the system or the worker. "
-        )
-    )
-    request_time_avg: float = Field(
-        description=(
-            "The average time spent processing all requests in the benchmark run. "
-            "This is the time from when the actual request was started to when "
-            "it was completed."
-        )
-    )
-
-
-class BenchmarkMetrics(StandardBaseModel):
-    """
-    A serializable model representing the metrics for a benchmark run.
-    """
-
-    requests_per_second: StatusDistributionSummary = Field(
-        description="The distribution of requests per second for the benchmark.",
-    )
-    request_concurrency: StatusDistributionSummary = Field(
-        description="The distribution of requests concurrency for the benchmark.",
-    )
-
-
-class Benchmark(StandardBaseModel):
-    """
-    The base serializable model representing a benchmark run and its results.
-    Specific benchmarker implementations should extend this model to include
-    additional information or metadata as needed.
-
-    Note, requests_per_second and request_concurrency are kept at this level
-    and are expected to be populated by the subclass implementation to ensure
-    the logic for Profiles can include more complicated logic for determining
-    what rates and concurrency values to use for subsequent strategies.
-    """
-
-    type_: Literal["benchmark"] = "benchmark"
-    id_: str = Field(
-        default_factory=lambda: str(uuid.uuid4()),
-        description="The unique identifier for the benchmark.",
-    )
-    run_id: str = Field(
-        description=(
-            "The unique identifier for the encompasing benchmark run that this "
-            "benchmark was a part of."
-        )
-    )
-    args: BenchmarkArgs = Field(
-        description=(
-            "The arguments used to specify how to run the benchmark and collect data."
-        )
-    )
-    run_stats: BenchmarkRunStats = Field(
-        description=(
-            "The process statistics for the entire benchmark run across all requests."
-        )
-    )
-    worker: Union[WorkerDescription] = Field(
-        description=(
-            "The description and specifics for the worker used to resolve requests "
-            "for this benchmark."
-        ),
-    )
-    request_loader: Union[RequestLoaderDescription] = Field(
-        description=(
-            "The description and specifics for the request loader used to create "
-            "requests for this benchmark."
-        ),
-    )
-    extras: dict[str, Any] = Field(
-        description=(
-            "Any additional information or metadata that was passed for this benchmark."
-        )
-    )
-    metrics: BenchmarkMetrics = Field(
-        description=(
-            "The metrics for the benchmark run represented as a distribution of "
-            "various per-request statistics."
-        ),
-    )
-
-
-BenchmarkT = TypeVar("BenchmarkT", bound=Benchmark)
-
-
-class GenerativeTextResponseStats(StandardBaseModel):
-    """
-    A serializable model representing the request values, response values, and
-    statistics for a generative text response.
-    """
-
-    type_: Literal["generative_text_response"] = "generative_text_response"
-    request_id: Optional[str] = Field(
-        description="The unique identifier for the request.",
-    )
-    request_type: Literal["text_completions", "chat_completions"] = Field(
-        description="The type of request made to the generative backend."
-    )
-    scheduler_info: SchedulerRequestInfo = Field(
-        description=(
-            "The info about the request from the scheduler about how it was run."
-        ),
-    )
-    prompt: str = Field(
-        description="The text prompt used for the generative request.",
-    )
-    output: str = Field(
-        description="The generated text output from the generative request.",
-    )
-    prompt_tokens: int = Field(
-        description="The number of tokens in the prompt text.",
-    )
-    output_tokens: int = Field(
-        description="The number of tokens in the generated output text.",
-    )
-    start_time: float = Field(
-        description="The time the request started.",
-    )
-    end_time: float = Field(
-        description="The time the request ended.",
-    )
-    first_token_time: float = Field(
-        description="The time the first token was received.",
-    )
-    last_token_time: float = Field(
-        description="The time the last token was received.",
-    )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def request_latency(self) -> float:
-        """
-        :return: The duration of the request in seconds from the start to the end.
-        """
-        return self.end_time - self.start_time
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_to_first_token_ms(self) -> float:
-        """
-        :return: The time in milliseconds from the start of the request to the first
-            token received.
-        """
-        return 1000 * (self.first_token_time - self.start_time)
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_per_output_token_ms(self) -> float:
-        """
-        :return: The average time in milliseconds per output token generated.
-            This includes the time to generate the first token and all other tokens.
-        """
-        if self.output_tokens == 0:
-            return 0.0
-
-        return (
-            1000 * (self.last_token_time - self.first_token_time) / self.output_tokens
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def inter_token_latency_ms(self) -> float:
-        """
-        :return: The average time in milliseconds between generating tokens in the
-            output text. Note, does not include the time to generate the first token.
-        """
-        if self.output_tokens <= 1:
-            return 0.0
-
-        return (
-            1000
-            * (self.last_token_time - self.first_token_time)
-            / (self.output_tokens - 1)
-        )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def tokens_per_second(self) -> float:
-        """
-        :return: The average number of tokens generated per second in the prompt and
-            output text.
-        """
-        if (latency := self.request_latency) == 0.0:
-            return 0.0
-
-        return (self.prompt_tokens + self.output_tokens) / latency
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_tokens_per_second(self) -> float:
-        """
-        :return: The average number of output tokens generated per second.
-        """
-        if (latency := self.request_latency) == 0.0:
-            return 0.0
-
-        return self.output_tokens / latency
-
-
-class GenerativeTextErrorStats(GenerativeTextResponseStats):
-    """
-    A serializable model representing the request values, response values, and
-    statistics for a generative text response that errored.
-    Extends and overrides the GenerativeTextResponseStats model to include the
-    error message and optional properties given the error occurred.
-    """
-
-    type_: Literal["generative_text_error"] = "generative_text_error"  # type: ignore[assignment]
-    error: str = Field(
-        description=(
-            "The error message for the error that occurred while making the request."
-        )
-    )
-    output: Optional[str] = Field(  # type: ignore[assignment]
-        default=None,
-        description=(
-            "The generated text output from the generative request, if any, "
-            "before the error occurred."
-        ),
-    )
-    first_token_time: Optional[float] = Field(  # type: ignore[assignment]
-        default=None,
-        description=(
-            "The time the first token was received, if any, before the error occurred."
-        ),
-    )
-    last_token_time: Optional[float] = Field(  # type: ignore[assignment]
-        default=None,
-        description=(
-            "The time the last token was received, if any, before the error occurred."
-        ),
-    )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_to_first_token_ms(self) -> Optional[float]:  # type: ignore[override]
-        """
-        :return: The time in milliseconds from the start of the request to the first
-            token received. None if the first token was not received.
-        """
-        if self.first_token_time is None:
-            return None
-
-        return super().time_to_first_token_ms
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def time_per_output_token_ms(self) -> Optional[float]:  # type: ignore[override]
-        """
-        :return: The average time in milliseconds per output token generated.
-            This includes the time to generate the first token and all other tokens.
-            None if the output_tokens is None or 0.
-        """
-        if (
-            self.output_tokens is None
-            or self.output_tokens == 0
-            or self.first_token_time is None
-            or self.last_token_time is None
-        ):
-            return None
-
-        return super().time_per_output_token_ms
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def inter_token_latency_ms(self) -> Optional[float]:  # type: ignore[override]
-        """
-        :return: The average time in milliseconds between generating tokens in the
-            output text. Note, does not include the time to generate the first token.
-            None if there were no output_tokens or the first token was not received.
-        """
-        if (
-            self.output_tokens is None
-            or self.first_token_time is None
-            or self.last_token_time is None
-        ):
-            return None
-
-        return super().inter_token_latency_ms
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def output_tokens_per_second(self) -> Optional[float]:  # type: ignore[override]
-        """
-        :return: The average number of tokens generated per second in the output text.
-            Note, does not include the time to generate the first token. None if there
-            were no output_tokens or the first token was not received.
-        """
-        if self.inter_token_latency_ms is None:
-            return None
-
-        return super().output_tokens_per_second
-
-
-class GenerativeMetrics(BenchmarkMetrics):
-    """
-    A serializable model representing the metrics for a generative benchmark run.
-    """
-
-    request_latency: StatusDistributionSummary = Field(
-        description="The distribution of latencies for the completed requests.",
-    )
-    prompt_token_count: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of token counts in the prompts for completed, "
-            "errored, and all requests."
-        )
-    )
-    output_token_count: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of token counts in the outputs for completed, "
-            "errored, and all requests."
-        )
-    )
-    time_to_first_token_ms: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of latencies to receiving the first token in "
-            "milliseconds for completed, errored, and all requests."
-        ),
-    )
-    time_per_output_token_ms: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of latencies per output token in milliseconds for "
-            "completed, errored, and all requests. "
-            "This includes the time to generate the first token and all other tokens."
-        ),
-    )
-    inter_token_latency_ms: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of latencies between tokens in milliseconds for "
-            "completed, errored, and all requests."
-        ),
-    )
-    output_tokens_per_second: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of output tokens per second for completed, "
-            "errored, and all requests."
-        ),
-    )
-    tokens_per_second: StatusDistributionSummary = Field(
-        description=(
-            "The distribution of tokens per second, including prompt and output tokens "
-            "for completed, errored, and all requests."
-        ),
-    )
-
-
-class GenerativeBenchmark(Benchmark):
-    """
-    A serializable model representing a benchmark run and its results for generative
-    requests and responses. Includes the completed and errored requests, the start
-    and end times for the benchmark, and the statistics for the requests and responses.
-    """
-
-    type_: Literal["generative_benchmark"] = "generative_benchmark"  # type: ignore[assignment]
-    start_time: float = Field(
-        description="The start time of the first request for the benchmark.",
-    )
-    end_time: float = Field(
-        description="The end time of the last request for the benchmark.",
-    )
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def duration(self) -> float:
-        """
-        :return: The duration of the benchmark in seconds from the start of the
-            first request to the end of the last request.
-        """
-        return self.end_time - self.start_time
-
-    worker: GenerativeRequestsWorkerDescription = Field(
-        description=(
-            "The description and specifics for the worker used to resolve requests "
-            "for this benchmark."
-        ),
-    )
-    request_loader: GenerativeRequestLoaderDescription = Field(
-        description=(
-            "The description and specifics for the request loader used to create "
-            "requests for this benchmark."
-        ),
-    )
-    metrics: GenerativeMetrics = Field(
-        description=(
-            "The metrics for the benchmark run represented as a distribution of "
-            "various per-request statistics."
-        ),
-    )
-    # Output is ordered so keep the requests at the end for better readability in files
-    request_totals: StatusBreakdown[int, int, int, int] = Field(
-        description=(
-            "The number of requests made for the benchmark broken down by status "
-            "including successful, incomplete, errored, and the sum of all three"
-        )
-    )
-    request_samples: Optional[StatusBreakdown[int, int, int, None]] = Field(
-        description=(
-            "The number of requests that were randomly sampled for "
-            "the benchmark. None if no sampling was applied."
-        ),
-        default=None,
-    )
-    requests: StatusBreakdown[
-        list[GenerativeTextResponseStats],
-        list[GenerativeTextErrorStats],
-        list[GenerativeTextErrorStats],
-        None,
-    ] = Field(
-        description=(
-            "The breakdown of requests for the benchmark run including successful, "
-            "incomplete, and errored requests."
-        ),
-    )
-
-    def set_sample_size(self, sample_size: Optional[int]) -> "GenerativeBenchmark":
-        """
-        Set the sample size for the benchmark. This will randomly sample the
-        requests for each status type to the given sample size or the maximum
-        number of requests for that status type, whichever is smaller.
-        This is applied to requests.successful, requests.errored, and
-        requests.incomplete.
-        If None, no sampling is applied and the state is kept.
-
-        :param sample_size: The number of requests to sample for each status type.
-        :return: The benchmark with the sampled requests.
-        :raises ValueError: If the sample size is invalid.
-        """
-
-        if sample_size is not None:
-            if sample_size < 0 or not isinstance(sample_size, int):
-                raise ValueError(
-                    f"Sample size must be non-negative integer, given {sample_size}"
-                )
-
-            sample_size = min(sample_size, len(self.requests.successful))
-            error_sample_size = min(sample_size, len(self.requests.errored))
-            incomplete_sample_size = min(sample_size, len(self.requests.incomplete))
-
-            self.requests.successful = random.sample(
-                self.requests.successful, sample_size
-            )
-            self.requests.errored = random.sample(
-                self.requests.errored, error_sample_size
-            )
-            self.requests.incomplete = random.sample(
-                self.requests.incomplete, incomplete_sample_size
-            )
-            self.request_samples = StatusBreakdown(
-                successful=len(self.requests.successful),
-                incomplete=len(self.requests.incomplete),
-                errored=len(self.requests.errored),
-            )
-
-        return self
-
-    @staticmethod
-    def from_stats(
-        run_id: str,
-        successful: list[GenerativeTextResponseStats],
-        incomplete: list[GenerativeTextErrorStats],
-        errored: list[GenerativeTextErrorStats],
-        args: BenchmarkArgs,
-        run_stats: BenchmarkRunStats,
-        worker: GenerativeRequestsWorkerDescription,
-        requests_loader: GenerativeRequestLoaderDescription,
-        extras: Optional[dict[str, Any]],
-    ) -> "GenerativeBenchmark":
-        """
-        Create a GenerativeBenchmark instance from the given statistics and metadata.
-        Given the completed and errored requests, the benchmark will fill in the
-        remaining statistics for the various metrics required for a benchmark.
-        This is the preferred method for creating a GenerativeBenchmark instance
-        to ensure all statistics are properly calculated and populated.
-
-        :param run_id: The unique identifier for the benchmark run.
-        :param completed: The list of completed requests.
-        :param errored: The list of errored requests.
-        :param args: The arguments used to specify how to run the benchmark
-            and collect data.
-        :param run_stats: The process statistics for the entire benchmark run across
-            all requests.
-        :param worker: The description and specifics for the worker used to resolve
-            requests.
-        :param requests_loader: The description and specifics for the request loader
-            used to create requests.
-        :param extras: Any additional information or metadata that was passed for
-            this benchmark.
-        :return: A GenerativeBenchmark instance with the given statistics and metadata
-            populated and calculated
-        """
-        total = successful + incomplete + errored
-        total_types: list[Literal["successful", "incomplete", "error"]] = [
-            *["successful"] * len(successful),  # type: ignore[list-item]
-            *["incomplete"] * len(incomplete),  # type: ignore[list-item]
-            *["error"] * len(errored),  # type: ignore[list-item]
-        ]
-        start_time = min(req.start_time for req in total)
-        end_time = max(req.end_time for req in total)
-
-        total_with_prompt, total_types_with_prompt = (
-            zip(*filtered)
-            if (
-                filtered := list(
-                    filter(lambda val: bool(val[0].prompt), zip(total, total_types))
-                )
-            )
-            else ([], [])
-        )
-        total_with_output_first, total_types_with_output_first = (
-            zip(*filtered)
-            if (
-                filtered := list(
-                    filter(
-                        lambda val: bool(val[0].output_tokens > 0),
-                        zip(total, total_types),
-                    )
-                )
-            )
-            else ([], [])
-        )
-        total_with_output_multi, total_types_with_output_multi = (
-            zip(*filtered)
-            if (
-                filtered := list(
-                    filter(
-                        lambda val: bool(val[0].output_tokens > 1),
-                        zip(total, total_types),
-                    )
-                )
-            )
-            else ([], [])
-        )
-
-        return GenerativeBenchmark(
-            run_id=run_id,
-            args=args,
-            run_stats=run_stats,
-            extras=extras or {},
-            start_time=start_time,
-            end_time=end_time,
-            worker=worker,
-            request_loader=requests_loader,
-            metrics=GenerativeMetrics(
-                requests_per_second=StatusDistributionSummary.from_request_times(
-                    request_types=total_types,
-                    requests=[(req.start_time, req.end_time) for req in total],
-                    distribution_type="rate",
-                ),
-                request_concurrency=StatusDistributionSummary.from_request_times(
-                    request_types=total_types,
-                    requests=[(req.start_time, req.end_time) for req in total],
-                    distribution_type="concurrency",
-                ),
-                request_latency=StatusDistributionSummary.from_values(
-                    value_types=total_types,
-                    values=[req.request_latency for req in total],
-                ),
-                prompt_token_count=StatusDistributionSummary.from_values(
-                    value_types=list(total_types_with_prompt),
-                    values=[req.prompt_tokens for req in total_with_prompt],
-                ),
-                output_token_count=StatusDistributionSummary.from_values(
-                    value_types=list(total_types_with_output_first),
-                    values=[req.output_tokens for req in total_with_output_first],
-                ),
-                time_to_first_token_ms=StatusDistributionSummary.from_values(
-                    value_types=list(total_types_with_output_first),
-                    values=[
-                        req.time_to_first_token_ms or 0
-                        for req in total_with_output_first
-                    ],
-                ),
-                time_per_output_token_ms=StatusDistributionSummary.from_values(
-                    value_types=list(total_types_with_output_first),
-                    values=[
-                        req.time_per_output_token_ms or 0
-                        for req in total_with_output_first
-                    ],
-                    weights=[req.output_tokens for req in total_with_output_first],
-                ),
-                inter_token_latency_ms=StatusDistributionSummary.from_values(
-                    value_types=list(total_types_with_output_multi),
-                    values=[
-                        req.inter_token_latency_ms or 0
-                        for req in total_with_output_multi
-                    ],
-                    weights=[req.output_tokens - 1 for req in total_with_output_multi],
-                ),
-                output_tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
-                    request_types=list(total_types_with_output_first),
-                    requests=[
-                        (req.start_time, req.end_time)
-                        for req in total_with_output_first
-                    ],
-                    first_iter_times=[
-                        req.first_token_time or req.start_time
-                        for req in total_with_output_first
-                    ],
-                    iter_counts=[req.output_tokens for req in total_with_output_first],
-                ),
-                tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
-                    request_types=list(total_types_with_output_first),
-                    requests=[
-                        (req.start_time, req.end_time)
-                        for req in total_with_output_first
-                    ],
-                    first_iter_times=[
-                        req.first_token_time or req.start_time
-                        for req in total_with_output_first
-                    ],
-                    iter_counts=[req.output_tokens for req in total_with_output_first],
-                    first_iter_counts=[
-                        # prompt tokens + first token
-                        req.prompt_tokens + 1
-                        for req in total_with_output_first
-                    ],
-                ),
-            ),
-            request_totals=StatusBreakdown(
-                successful=len(successful),
-                incomplete=len(incomplete),
-                errored=len(errored),
-                total=len(total),
-            ),
-            requests=StatusBreakdown(
-                successful=successful,
-                incomplete=incomplete,
-                errored=errored,
-            ),
-        )
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
index 0e34e322..ae591c23 100644
--- a/src/guidellm/benchmark/benchmarker.py
+++ b/src/guidellm/benchmark/benchmarker.py
@@ -1,334 +1,266 @@
-import time
+"""
+Benchmark execution orchestration and lifecycle management.
+
+Provides the core benchmarking engine that coordinates request scheduling,
+data aggregation, and result compilation across different execution strategies
+and environments.
+
+Classes:
+    Benchmarker: Abstract benchmark orchestrator for request processing workflows.
+
+Type Variables:
+    BenchmarkT: Generic benchmark result type.
+    RequestT: Generic request object type.
+    RequestTimingsT: Generic request timing object type.
+    ResponseT: Generic response object type.
+"""
+
+from __future__ import annotations
+
 import uuid
-from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator, Iterable
-from pathlib import Path
+from abc import ABC
+from collections.abc import AsyncIterator, Iterable
 from typing import (
     Any,
     Generic,
-    Literal,
-    Optional,
-    Union,
 )
 
-from pydantic import Field
-from transformers import PreTrainedTokenizerBase  # type: ignore  # noqa: PGH003
-
-from guidellm.backend import Backend, ResponseSummary
 from guidellm.benchmark.aggregator import (
-    AggregatorT,
-    BenchmarkT,
-    GenerativeBenchmarkAggregator,
+    Aggregator,
+    AggregatorState,
+    CompilableAggregator,
 )
-from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
+from guidellm.benchmark.objects import BenchmarkerDict, BenchmarkT, SchedulerDict
 from guidellm.benchmark.profile import Profile
-from guidellm.objects import StandardBaseModel
-from guidellm.request import (
-    GenerationRequest,
-    GenerativeRequestLoaderDescription,
-    RequestLoaderDescription,
+from guidellm.scheduler import (
+    BackendInterface,
+    Constraint,
+    Environment,
+    NonDistributedEnvironment,
     RequestT,
     ResponseT,
-)
-from guidellm.scheduler import (
-    GenerativeRequestsWorker,
-    RequestsWorker,
     Scheduler,
-    SchedulerRequestResult,
+    SchedulerState,
     SchedulingStrategy,
 )
+from guidellm.utils import InfoMixin, ThreadSafeSingletonMixin
+from guidellm.utils.pydantic_utils import StandardBaseDict
 
-__all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
+__all__ = ["Benchmarker"]
 
 
-class BenchmarkerResult(
-    StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
+class Benchmarker(
+    Generic[BenchmarkT, RequestT, ResponseT],
+    ABC,
+    ThreadSafeSingletonMixin,
 ):
-    type_: Literal[
-        "run_start",
-        "run_complete",
-        "scheduler_start",
-        "scheduler_update",
-        "scheduler_complete",
-        "benchmark_compiled",
-    ]
-    start_time: float
-    end_number: int
-    profile: Profile
-    current_index: int
-    current_strategy: Optional[SchedulingStrategy] = None
-    current_aggregator: Optional[AggregatorT] = None
-    current_benchmark: Optional[BenchmarkT] = None
-    current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
-
-
-class BenchmarkerStrategyLimits(StandardBaseModel):
-    requests_loader_size: Optional[int] = Field(
-        description="Size of the request loader.",
-    )
-    max_number_per_strategy: Optional[int] = Field(
-        description="Maximum number of requests to process per strategy.",
-        ge=0,
-    )
-    max_duration_per_strategy: Optional[float] = Field(
-        description="Maximum duration (in seconds) to process requests per strategy.",
-        ge=0,
-    )
-    warmup_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for warmup.",
-        ge=0,
-        le=1,
-    )
-    cooldown_percent_per_strategy: Optional[float] = Field(
-        description="Percentage of requests to use for cooldown.",
-        ge=0,
-        le=1,
-    )
-
-    @property
-    def max_number(self) -> Optional[int]:
-        if self.max_number_per_strategy is not None:
-            return self.max_number_per_strategy
-
-        if self.requests_loader_size is not None:
-            return self.requests_loader_size
-
-        return None
-
-    @property
-    def max_duration(self) -> Optional[float]:
-        return self.max_duration_per_strategy
+    """
+    Abstract benchmark orchestrator for request processing workflows.
 
-    @property
-    def warmup_number(self) -> Optional[int]:
-        if self.warmup_percent_per_strategy is None or self.max_number is None:
-            return None
+    Coordinates the execution of benchmarking runs across different scheduling
+    strategies, aggregating metrics and compiling results. Manages the complete
+    benchmark lifecycle from request submission through result compilation.
 
-        return int(self.warmup_percent_per_strategy * self.max_number)
-
-    @property
-    def warmup_duration(self) -> Optional[float]:
-        if self.warmup_percent_per_strategy is None or self.max_duration is None:
-            return None
-
-        return self.warmup_percent_per_strategy * self.max_duration
-
-    @property
-    def cooldown_number(self) -> Optional[int]:
-        if self.cooldown_percent_per_strategy is None or self.max_number is None:
-            return None
-
-        return int(self.cooldown_percent_per_strategy * self.max_number)
-
-    @property
-    def cooldown_duration(self) -> Optional[float]:
-        if self.cooldown_percent_per_strategy is None or self.max_duration is None:
-            return None
-
-        return self.cooldown_percent_per_strategy * self.max_duration
-
-
-class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
-    def __init__(
-        self,
-        worker: RequestsWorker[RequestT, ResponseT],
-        request_loader: Iterable[RequestT],
-        requests_loader_description: RequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-    ):
-        self.worker = worker
-        self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
-            worker=worker, request_loader=request_loader
-        )
-        self.requests_loader_description = requests_loader_description
-        self.benchmark_save_extras = benchmark_save_extras
+    Implements thread-safe singleton pattern to ensure consistent state across
+    concurrent benchmark operations.
+    """
 
     async def run(
         self,
+        requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
+        backend: BackendInterface[RequestT, ResponseT],
         profile: Profile,
-        max_number_per_strategy: Optional[int],
-        max_duration_per_strategy: Optional[float],
-        warmup_percent_per_strategy: Optional[float],
-        cooldown_percent_per_strategy: Optional[float],
-    ) -> AsyncGenerator[
-        BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
+        benchmark_class: type[BenchmarkT],
+        benchmark_aggregators: dict[
+            str,
+            Aggregator[ResponseT, RequestT] | CompilableAggregator[ResponseT, RequestT],
+        ],
+        environment: Environment | None = None,
+    ) -> AsyncIterator[
+        tuple[
+            AggregatorState | None,
+            BenchmarkT | None,
+            SchedulingStrategy,
+            SchedulerState | None,
+        ]
     ]:
-        try:
-            requests_loader_size = len(self.scheduler.request_loader)  # type: ignore[arg-type]
-        except Exception:  # noqa: BLE001
-            requests_loader_size = None
-
-        strategy_limits = BenchmarkerStrategyLimits(
-            requests_loader_size=requests_loader_size,
-            max_number_per_strategy=max_number_per_strategy,
-            max_duration_per_strategy=max_duration_per_strategy,
-            warmup_percent_per_strategy=warmup_percent_per_strategy,
-            cooldown_percent_per_strategy=cooldown_percent_per_strategy,
-        )
-        start_time = time.time()
-        end_number = len(profile.strategy_types)
-        current_index = -1
-        run_id = str(uuid.uuid4())
-
-        yield BenchmarkerResult(
-            type_="run_start",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
-
-        while scheduling_strategy := profile.next_strategy():
-            current_index += 1
-            aggregator = self.create_benchmark_aggregator(
-                run_id=run_id,
+        """
+        Execute benchmark runs across multiple scheduling strategies.
+
+        Orchestrates the complete benchmark workflow: iterates through scheduling
+        strategies from the profile, executes requests through the scheduler,
+        aggregates metrics, and compiles final benchmark results.
+
+        :param requests: Request datasets for processing across strategies.
+        :param backend: Backend interface for request processing.
+        :param profile: Benchmark profile defining strategies and constraints.
+        :param environment: Execution environment for coordination.
+        :param benchmark_aggregators: Metric aggregation functions by name.
+        :param benchmark_class: Class for constructing final benchmark objects.
+        :yield: Tuples of (metrics_update, benchmark_result, strategy, state).
+        :raises Exception: If benchmark execution or compilation fails.
+        """
+        with self.thread_lock:
+            if environment is None:
+                environment = NonDistributedEnvironment()
+
+            run_id = str(uuid.uuid4())
+            strategies_generator = profile.strategies_generator()
+            strategy, constraints = next(strategies_generator)
+
+            while strategy is not None:
+                yield None, None, strategy, None
+                aggregators_state = {
+                    key: AggregatorState() for key in benchmark_aggregators
+                }
+
+                async for (
+                    response,
+                    request,
+                    request_info,
+                    scheduler_state,
+                ) in Scheduler[RequestT, ResponseT]().run(
+                    requests=requests,
+                    backend=backend,
+                    strategy=strategy,
+                    env=environment,
+                    **constraints,
+                ):
+                    aggregators_update = AggregatorState()
+                    for key, aggregator in benchmark_aggregators.items():
+                        update = aggregator(
+                            aggregators_state[key],
+                            response,
+                            request,
+                            request_info,
+                            scheduler_state,
+                        )
+                        if update:
+                            aggregators_update.update(update)
+                    yield aggregators_update, None, strategy, scheduler_state
+
+                benchmark_kwargs = self._compile_benchmark_kwargs(
+                    run_id=run_id,
+                    run_index=len(profile.completed_strategies),
+                    profile=profile,
+                    requests=requests,
+                    backend=backend,
+                    environment=environment,
+                    aggregators=benchmark_aggregators,
+                    aggregators_state=aggregators_state,
+                    strategy=strategy,
+                    constraints=constraints,
+                    scheduler_state=scheduler_state,
+                )
+                benchmark = benchmark_class(**benchmark_kwargs)
+                yield None, benchmark, strategy, None
+
+                try:
+                    strategy, constraints = strategies_generator.send(benchmark)
+                except StopIteration:
+                    strategy = None
+                    constraints = None
+
+    @classmethod
+    def _compile_benchmark_kwargs(
+        cls,
+        run_id: str,
+        run_index: int,
+        profile: Profile,
+        requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
+        backend: BackendInterface[RequestT, ResponseT],
+        environment: Environment,
+        aggregators: dict[
+            str,
+            Aggregator[ResponseT, RequestT] | CompilableAggregator[ResponseT, RequestT],
+        ],
+        aggregators_state: dict[str, dict[str, Any]],
+        strategy: SchedulingStrategy,
+        constraints: dict[str, Any | dict[str, Any] | Constraint],
+        scheduler_state: SchedulerState | None,
+    ) -> dict[str, Any]:
+        """
+        Compile benchmark construction parameters from execution results.
+
+        Aggregates metadata from scheduler execution and compiles it into
+        structured parameters for benchmark object construction.
+
+        :param run_id: Unique identifier for the benchmark run.
+        :param run_index: Index of this strategy in the benchmark profile.
+        :param profile: Benchmark profile containing strategy configuration.
+        :param requests: Request datasets used for the benchmark.
+        :param backend: Backend interface used for request processing.
+        :param environment: Execution environment for coordination.
+        :param aggregators: Metric aggregation functions by name.
+        :param aggregators_state: Current state of metric aggregators.
+        :param strategy: Scheduling strategy that was executed.
+        :param constraints: Runtime constraints applied during execution.
+        :param scheduler_state: Final state of scheduler execution.
+        :return: Dictionary of parameters for benchmark object construction.
+        :raises ValueError: If aggregator output conflicts with existing keys.
+        """
+        benchmark_kwargs = {
+            "run_id": run_id,
+            "run_index": run_index,
+            "scheduler": SchedulerDict(
+                strategy=strategy,
+                constraints={
+                    key: InfoMixin.extract_from_obj(val)
+                    for key, val in constraints.items()
+                },
+                state=scheduler_state,
+            ),
+            "benchmarker": BenchmarkerDict(
                 profile=profile,
-                strategy_index=current_index,
-                strategy=scheduling_strategy,
-                limits=strategy_limits,
+                requests=InfoMixin.extract_from_obj(requests),
+                backend=backend.info,
+                environment=environment.info,
+                aggregators={
+                    key: InfoMixin.extract_from_obj(aggregator)
+                    for key, aggregator in aggregators.items()
+                },
+            ),
+            "env_args": StandardBaseDict(),
+            "extras": StandardBaseDict(),
+        }
+
+        def _combine(
+            existing: dict[str, Any] | StandardBaseDict,
+            addition: dict[str, Any] | StandardBaseDict,
+        ) -> dict[str, Any] | StandardBaseDict:
+            if not isinstance(existing, (dict, StandardBaseDict)):
+                raise ValueError(
+                    f"Existing value {existing} (type: {type(existing).__name__}) "
+                    f"is not a valid type for merging."
+                )
+            if not isinstance(addition, (dict, StandardBaseDict)):
+                raise ValueError(
+                    f"Addition value {addition} (type: {type(addition).__name__}) "
+                    f"is not a valid type for merging."
+                )
+
+            add_kwargs = (
+                addition if isinstance(addition, dict) else addition.model_dump()
             )
 
-            async for result in self.scheduler.run(
-                scheduling_strategy=scheduling_strategy,
-                max_number=max_number_per_strategy,
-                max_duration=max_duration_per_strategy,
-            ):
-                if result.type_ == "run_start":
-                    yield BenchmarkerResult(
-                        type_="scheduler_start",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif result.type_ == "run_complete":
-                    yield BenchmarkerResult(
-                        type_="scheduler_complete",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=None,
-                    )
-                elif isinstance(result, SchedulerRequestResult):
-                    aggregator.add_result(result)
+            if isinstance(existing, dict):
+                return {**add_kwargs, **existing}
 
-                    yield BenchmarkerResult(
-                        type_="scheduler_update",
-                        start_time=start_time,
-                        end_number=end_number,
-                        profile=profile,
-                        current_index=current_index,
-                        current_strategy=scheduling_strategy,
-                        current_aggregator=aggregator,
-                        current_benchmark=None,
-                        current_result=result,
-                    )
-                else:
-                    raise ValueError(f"Unexpected result type: {type(result)}")
+            return existing.__class__(**{**add_kwargs, **existing.model_dump()})
 
-            benchmark: BenchmarkT = aggregator.compile()
-            profile.completed_strategy(
-                average_rate=benchmark.metrics.requests_per_second.successful.mean,
-                average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
-            )
-
-            yield BenchmarkerResult(
-                type_="benchmark_compiled",
-                start_time=start_time,
-                end_number=end_number,
-                profile=profile,
-                current_index=current_index,
-                current_strategy=scheduling_strategy,
-                current_aggregator=None,
-                current_benchmark=benchmark,
-                current_result=None,
-            )
+        for key, aggregator in aggregators.items():
+            if not isinstance(aggregator, CompilableAggregator):
+                continue
 
-        yield BenchmarkerResult(
-            type_="run_complete",
-            start_time=start_time,
-            end_number=end_number,
-            profile=profile,
-            current_index=current_index,
-            current_strategy=None,
-            current_aggregator=None,
-            current_benchmark=None,
-            current_result=None,
-        )
+            compiled = aggregator.compile(aggregators_state[key], scheduler_state)
 
-    @abstractmethod
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> AggregatorT: ...
-
-
-class GenerativeBenchmarker(
-    Benchmarker[
-        GenerativeBenchmarkAggregator,
-        GenerativeBenchmark,
-        GenerationRequest,
-        ResponseSummary,
-    ],
-):
-    def __init__(
-        self,
-        backend: Backend,
-        request_loader: Iterable[GenerationRequest],
-        request_loader_description: GenerativeRequestLoaderDescription,
-        benchmark_save_extras: Optional[dict[str, Any]] = None,
-        processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
-        processor_args: Optional[dict[str, Any]] = None,
-    ):
-        super().__init__(
-            worker=GenerativeRequestsWorker(backend),
-            request_loader=request_loader,
-            requests_loader_description=request_loader_description,
-            benchmark_save_extras=benchmark_save_extras,
-        )
-        self.processor = processor
-        self.processor_args = processor_args
+            for field_name, field_val in compiled.items():
+                if field_name in benchmark_kwargs:
+                    # If the key already exists, merge the values
+                    benchmark_kwargs[field_name] = _combine(
+                        benchmark_kwargs[field_name], field_val
+                    )
+                else:
+                    benchmark_kwargs[field_name] = field_val
 
-    def create_benchmark_aggregator(
-        self,
-        run_id: str,
-        profile: Profile,
-        strategy_index: int,
-        strategy: SchedulingStrategy,
-        limits: BenchmarkerStrategyLimits,
-    ) -> GenerativeBenchmarkAggregator:
-        return GenerativeBenchmarkAggregator(
-            run_id=run_id,
-            args=BenchmarkArgs(
-                profile=profile,
-                strategy_index=strategy_index,
-                strategy=strategy,
-                max_number=limits.max_number,
-                max_duration=limits.max_duration,
-                warmup_number=limits.warmup_number,
-                warmup_duration=limits.warmup_duration,
-                cooldown_number=limits.cooldown_number,
-                cooldown_duration=limits.cooldown_duration,
-            ),
-            worker_description=self.worker.description,  # type: ignore[arg-type]
-            request_loader_description=self.requests_loader_description,  # type: ignore[arg-type]
-            extras=self.benchmark_save_extras or {},
-            processor=self.processor,
-            processor_args=self.processor_args,
-        )
+        return benchmark_kwargs
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
index 2ef85c3e..60077ee8 100644
--- a/src/guidellm/benchmark/entrypoints.py
+++ b/src/guidellm/benchmark/entrypoints.py
@@ -1,23 +1,56 @@
+from __future__ import annotations
+
 from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Literal, Optional, Union
+from typing import Any, Literal
 
 from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from transformers import (  # type: ignore[import]
     PreTrainedTokenizerBase,
 )
 
-from guidellm.backend import Backend, BackendType
-from guidellm.benchmark.benchmarker import GenerativeBenchmarker
+from guidellm.backends import (
+    Backend,
+    BackendType,
+    GenerationRequest,
+    GenerationResponse,
+)
+from guidellm.benchmark.aggregator import (
+    Aggregator,
+    CompilableAggregator,
+    GenerativeRequestsAggregator,
+    GenerativeStatsProgressAggregator,
+    SchedulerStatsAggregator,
+    SerializableAggregator,
+)
+from guidellm.benchmark.benchmarker import Benchmarker
+from guidellm.benchmark.objects import GenerativeBenchmark, GenerativeBenchmarksReport
 from guidellm.benchmark.output import (
-    GenerativeBenchmarksConsole,
-    GenerativeBenchmarksReport,
+    GenerativeBenchmarkerConsole,
+    GenerativeBenchmarkerOutput,
+)
+from guidellm.benchmark.profile import Profile, ProfileType
+from guidellm.benchmark.progress import (
+    BenchmarkerProgress,
+    BenchmarkerProgressGroup,
 )
-from guidellm.benchmark.profile import ProfileType, create_profile
-from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
 from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
 from guidellm.request import GenerativeRequestLoader
-from guidellm.scheduler import StrategyType
+from guidellm.scheduler import (
+    ConstraintInitializer,
+    NonDistributedEnvironment,
+    StrategyType,
+)
+from guidellm.utils import Console, InfoMixin
+
+__all__ = [
+    "benchmark_generative_text",
+    "benchmark_with_scenario",
+    "reimport_benchmarks_report",
+]
+
+
+_CURRENT_WORKING_DIR = Path.cwd()
 
 
 async def benchmark_with_scenario(scenario: Scenario, **kwargs):
@@ -31,135 +64,250 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
         raise ValueError(f"Unsupported Scenario type {type(scenario)}")
 
 
-async def benchmark_generative_text(
+# @validate_call(config={"arbitrary_types_allowed": True})
+async def benchmark_generative_text(  # noqa: C901
     target: str,
-    backend_type: BackendType,
-    backend_args: Optional[dict[str, Any]],
-    model: Optional[str],
-    processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
-    processor_args: Optional[dict[str, Any]],
-    data: Union[
-        str,
-        Path,
-        Iterable[Union[str, dict[str, Any]]],
-        Dataset,
-        DatasetDict,
-        IterableDataset,
-        IterableDatasetDict,
-    ],
-    data_args: Optional[dict[str, Any]],
-    data_sampler: Optional[Literal["random"]],
-    rate_type: Union[StrategyType, ProfileType],
-    rate: Optional[Union[float, list[float]]],
-    max_seconds: Optional[float],
-    max_requests: Optional[int],
-    warmup_percent: Optional[float],
-    cooldown_percent: Optional[float],
-    output_path: Optional[Union[str, Path]],
-    output_extras: Optional[dict[str, Any]],
-    output_sampling: Optional[int],
-    random_seed: int,
-    show_progress: bool = True,
-    show_progress_scheduler_stats: bool = False,
-    output_console: bool = True,
-) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
-    console = GenerativeBenchmarksConsole(enabled=show_progress)
-    console.print_line("Creating backend...")
-    backend = Backend.create(
-        backend_type, target=target, model=model, **(backend_args or {})
-    )
-    await backend.validate()
-    console.print_line(
-        f"Backend {backend_type} connected to {target} for model {backend.model}."
-    )
+    data: (
+        Iterable[str]
+        | Iterable[dict[str, Any]]
+        | Dataset
+        | DatasetDict
+        | IterableDataset
+        | IterableDatasetDict
+        | str
+        | Path
+    ),
+    profile: StrategyType | ProfileType | Profile,
+    rate: float | list[float] | None = None,
+    random_seed: int = 42,
+    # Backend configuration
+    backend: BackendType | Backend = "openai_http",
+    backend_kwargs: dict[str, Any] | None = None,
+    model: str | None = None,
+    # Data configuration
+    processor: str | Path | PreTrainedTokenizerBase | None = None,
+    processor_args: dict[str, Any] | None = None,
+    data_args: dict[str, Any] | None = None,
+    data_sampler: Literal["random"] | None = None,
+    # Output configuration
+    output_path: str | Path | None = _CURRENT_WORKING_DIR,
+    output_formats: (
+        tuple[str, ...]
+        | list[str]
+        | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
+        | None
+    ) = ("console", "json", "html", "csv"),
+    # Updates configuration
+    progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None,
+    print_updates: bool = False,
+    # Aggregators configuration
+    add_aggregators: (
+        dict[str, str | dict[str, Any] | Aggregator | CompilableAggregator] | None
+    ) = None,
+    warmup: float | None = None,
+    cooldown: float | None = None,
+    request_samples: int | None = 20,
+    # Constraints configuration
+    max_seconds: int | float | None = None,
+    max_requests: int | None = None,
+    max_errors: int | None = None,
+    max_error_rate: float | None = None,
+    max_global_error_rate: float | None = None,
+    **constraints: dict[str, ConstraintInitializer | Any],
+) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
+    console = Console(quiet=not print_updates)
 
-    if processor is None:
-        processor = backend.model
-
-    console.print_line("Creating request loader...")
-    request_loader = GenerativeRequestLoader(
-        data=data,
-        data_args=data_args,
-        processor=processor,
-        processor_args=processor_args,
-        shuffle=data_sampler == "random",
-        iter_type=(
-            "finite"  # assume a finite dataset is our limit
-            if max_requests is None and max_seconds is None
-            else "infinite"  # default to infinite so we don't run out of data
-        ),
-        random_seed=random_seed,
-    )
-    unique_requests = request_loader.num_unique_items(raise_err=False)
-    console.print_line(
-        f"Created loader with {unique_requests} unique requests from {data}.\n\n"
-        if unique_requests > 0
-        else f"Created loader with unknown number unique requests from {data}.\n\n"
-    )
+    with console.print_update_step(
+        title=f"Initializing backend {backend}"
+    ) as console_step:
+        backend = (
+            Backend.create(
+                backend, target=target, model=model, **(backend_kwargs or {})
+            )
+            if not isinstance(backend, Backend)
+            else backend
+        )
+        console_step.update(f"{backend.__class__.__name__} backend initialized")
+        await backend.process_startup()
+        await backend.validate()
+        console_step.finish(
+            title=f"{backend.__class__.__name__} backend initialized",
+            details=backend.info,
+            status_level="success",
+        )
 
-    profile = create_profile(rate_type=rate_type, rate=rate)
-    benchmarker = GenerativeBenchmarker(
-        backend=backend,
-        request_loader=request_loader,
-        request_loader_description=request_loader.description,
-        benchmark_save_extras=output_extras,
-        processor=processor,
-        processor_args=processor_args,
-    )
-    progress = (
-        GenerativeTextBenchmarkerProgressDisplay(
-            display_scheduler_stats=show_progress_scheduler_stats
+    with console.print_update_step(title="Resolving processor") as console_step:
+        if processor is not None:
+            console_step.finish(
+                title="Processor resolved",
+                details=f"Using processor '{processor}'",
+                status_level="success",
+            )
+        elif model is not None:
+            console_step.finish(
+                title="Processor resolved",
+                details=f"Using model '{model}' as processor",
+                status_level="success",
+            )
+            processor = model
+        else:
+            console_step.update(
+                title="Resolving processor from backend.default_model",
+                status_level="info",
+            )
+            processor = await backend.default_model()
+            console_step.finish(
+                title="Processor resolved",
+                details=(
+                    f"Using model '{processor}' from backend "
+                    f"{backend.__class__.__name__} as processor"
+                ),
+                status_level="success",
+            )
+        await backend.process_shutdown()
+
+    with console.print_update_step(
+        title=f"Initializing request loader from {data}"
+    ) as console_step:
+        request_loader = GenerativeRequestLoader(
+            data=data,
+            data_args=data_args,
+            processor=processor,
+            processor_args=processor_args,
+            shuffle=data_sampler == "random",
+            random_seed=random_seed,
+        )
+        unique_requests = request_loader.num_unique_items(raise_err=False)
+        console_step.finish(
+            title=(
+                f"Request loader initialized with {unique_requests} unique requests "
+                f"from {data}"
+            ),
+            details=InfoMixin.extract_from_obj(request_loader),
+            status_level="success",
+        )
+
+    with console.print_update_step(
+        title=f"Resolving profile {profile}"
+    ) as console_step:
+        for key, val in {
+            "max_seconds": max_seconds,
+            "max_requests": max_requests,
+            "max_errors": max_errors,
+            "max_error_rate": max_error_rate,
+            "max_global_error_rate": max_global_error_rate,
+        }.items():
+            if val is not None:
+                constraints[key] = val
+        if not isinstance(profile, Profile):
+            profile = Profile.create(
+                rate_type=profile,
+                rate=rate,
+                random_seed=random_seed,
+                constraints={**constraints},
+            )
+        elif constraints:
+            raise ValueError(
+                "Constraints must be empty when providing a Profile instance. "
+                f"Provided constraints: {constraints} ; provided profile: {profile}"
+            )
+        console_step.finish(
+            title=f"{profile.__class__.__name__} profile resolved",
+            details=InfoMixin.extract_from_obj(profile),
+            status_level="success",
+        )
+
+    with console.print_update_step(
+        title="Creating benchmark aggregators"
+    ) as console_step:
+        aggregators = {
+            "scheduler_stats": SchedulerStatsAggregator(),
+            "requests_progress": GenerativeStatsProgressAggregator(),
+            "requests": GenerativeRequestsAggregator(
+                request_samples=request_samples,
+                warmup=warmup,
+                cooldown=cooldown,
+            ),
+            **SerializableAggregator.resolve(add_aggregators or {}),
+        }
+        console_step.finish(
+            title="Benchmark aggregators created",
+            details={key: str(val) for key, val in aggregators.items()},
+            status_level="success",
+        )
+
+    with console.print_update_step(title="Resolving output formats") as console_step:
+        output_formats = GenerativeBenchmarkerOutput.resolve(
+            output_formats=(output_formats or {}), output_path=output_path
+        )
+        console_step.finish(
+            title="Output formats resolved",
+            details={key: str(val) for key, val in output_formats.items()},
+            status_level="success",
         )
-        if show_progress
-        else None
+
+    progress_group = BenchmarkerProgressGroup(
+        instances=progress or [], enabled=bool(progress)
     )
     report = GenerativeBenchmarksReport()
+    console.print_update(
+        title="Setup complete, starting benchmarks...", status="success"
+    )
+    console.print("\n\n")
 
-    async for result in benchmarker.run(
-        profile=profile,
-        max_number_per_strategy=max_requests,
-        max_duration_per_strategy=max_seconds,
-        warmup_percent_per_strategy=warmup_percent,
-        cooldown_percent_per_strategy=cooldown_percent,
+    async for (
+        _aggregator_update,
+        benchmark,
+        _strategy,
+        _scheduler_state,
+    ) in progress_group(
+        profile,
+        Benchmarker[
+            GenerativeBenchmark,
+            GenerationRequest,
+            GenerationResponse,
+        ]().run(
+            requests=request_loader,
+            backend=backend,
+            profile=profile,
+            environment=NonDistributedEnvironment(),
+            benchmark_aggregators=aggregators,
+            benchmark_class=GenerativeBenchmark,
+        ),
     ):
-        if progress:
-            progress.update(result)
-
-        if result.type_ == "benchmark_compiled":
-            if result.current_benchmark is None:
-                raise ValueError("Current benchmark is None")
-            report.benchmarks.append(
-                result.current_benchmark.set_sample_size(output_sampling)
-            )
+        if benchmark:
+            report.benchmarks.append(benchmark)
 
-    if output_console:
-        console.benchmarks = report.benchmarks
-        console.print_full_report()
+    output_format_results = {}
+    for key, output in output_formats.items():
+        output_result = await output.finalize(report)
+        output_format_results[key] = output_result
 
-    if output_path:
-        console.print_line("\nSaving benchmarks report...")
-        saved_path = report.save_file(output_path)
-        console.print_line(f"Benchmarks report saved to {saved_path}")
-    else:
-        saved_path = None
-
-    console.print_line("\nBenchmarking complete.")
+    console.print("\n\n")
+    console.print_update(
+        title=f"Benchmarking complete, generated {len(report.benchmarks)} benchmark(s)",
+        status="success",
+    )
+    for key, value in output_format_results.items():
+        console.print_update(title=f"  {key:<8}: {value}", status="debug")
 
-    return report, saved_path
+    return report, output_format_results
 
 
-def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
+def reimport_benchmarks_report(file: Path, output_path: Path | None) -> None:
     """
     The command-line entry point for re-importing and displaying an
     existing benchmarks report. Can also specify
     Assumes the file provided exists.
     """
-    console = GenerativeBenchmarksConsole(enabled=True)
     report = GenerativeBenchmarksReport.load_file(file)
-    console.benchmarks = report.benchmarks
-    console.print_full_report()
+    console_output = GenerativeBenchmarkerConsole()
+    console_output.finalize(report)
+    console = Console()
 
     if output_path:
-        console.print_line("\nSaving benchmarks report...")
-        saved_path = report.save_file(output_path)
-        console.print_line(f"Benchmarks report saved to {saved_path}")
+        with console.print_update_step(
+            title=f"Saving benchmarks report to {output_path}..."
+        ) as console_step:
+            saved_path = report.save_file(output_path)
+            console_step.finish(title=f"Benchmarks report saved to {saved_path}")
diff --git a/src/guidellm/benchmark/objects.py b/src/guidellm/benchmark/objects.py
new file mode 100644
index 00000000..8afabba9
--- /dev/null
+++ b/src/guidellm/benchmark/objects.py
@@ -0,0 +1,473 @@
+"""
+Benchmark data models and metrics for performance measurement and analysis.
+
+Provides comprehensive data structures for capturing, storing, and analyzing
+benchmark results from scheduler executions. Includes timing measurements,
+token statistics, and performance metrics for generative AI workloads.
+
+Classes:
+    BenchmarkSchedulerStats: Scheduler timing and performance statistics.
+    BenchmarkMetrics: Core benchmark metrics and distributions.
+    BenchmarkRequestStats: Individual request processing statistics.
+    Benchmark: Base benchmark result container with generic metrics.
+    GenerativeRequestStats: Request statistics for generative AI workloads.
+    GenerativeMetrics: Comprehensive metrics for generative benchmarks.
+    GenerativeBenchmark: Complete generative benchmark results and analysis.
+    GenerativeBenchmarksReport: Container for multiple benchmark results.
+
+Type Variables:
+    BenchmarkMetricsT: Generic benchmark metrics type.
+    BenchmarkRequestStatsT: Generic request statistics type.
+    BenchmarkT: Generic benchmark container type.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from pathlib import Path
+from typing import Any, ClassVar, Generic, Literal, TypeVar
+
+import yaml
+from pydantic import Field, computed_field
+
+from guidellm.benchmark.profile import (
+    Profile,
+)
+from guidellm.scheduler import (
+    ScheduledRequestInfo,
+    SchedulerState,
+    SchedulingStrategy,
+)
+from guidellm.utils import (
+    StandardBaseDict,
+    StandardBaseModel,
+    StatusBreakdown,
+    StatusDistributionSummary,
+)
+
+__all__ = [
+    "Benchmark",
+    "BenchmarkMetrics",
+    "BenchmarkSchedulerStats",
+    "BenchmarkT",
+    "GenerativeBenchmark",
+    "GenerativeBenchmarksReport",
+    "GenerativeMetrics",
+    "GenerativeRequestStats",
+]
+
+
+class BenchmarkSchedulerStats(StandardBaseDict):
+    """Scheduler timing and performance statistics."""
+
+    start_time: float = Field(
+        description="Unix timestamp when the benchmark run started"
+    )
+    end_time: float = Field(description="Unix timestamp when the benchmark run ended")
+    requests_made: StatusBreakdown[int, int, int, int] = Field(
+        description="Request counts by status: successful, incomplete, errored, total"
+    )
+    queued_time_avg: float = Field(
+        description="Avg time requests spent in the queue (seconds)"
+    )
+    worker_resolve_start_delay_avg: float = Field(
+        description="Avg delay before worker begins resolving req after dequeue (sec)"
+    )
+    worker_resolve_time_avg: float = Field(
+        description="Avg time for worker to resolve requests (seconds)"
+    )
+    worker_resolve_end_delay_avg: float = Field(
+        description="Avg delay after request end till worker resolves (seconds)"
+    )
+    finalized_delay_avg: float = Field(
+        description="Avg delay after resolve til finalized with in scheduler (sec)"
+    )
+    worker_targeted_start_delay_avg: float = Field(
+        description="Avg delay from targeted start to actual worker start (seconds)"
+    )
+    request_start_delay_avg: float = Field(
+        description="Avg delay after resolve til request start (seconds)"
+    )
+    request_time_avg: float = Field(description="Avg request processing time (seconds)")
+    request_targeted_start_delay_avg: float = Field(
+        description="Avg delay from targeted start to actual request start"
+    )
+
+
+class SchedulerDict(StandardBaseDict):
+    """Scheduler configuration and execution state dictionary."""
+
+    strategy: SchedulingStrategy
+    constraints: dict[str, dict[str, Any]]
+    state: SchedulerState
+
+
+class BenchmarkerDict(StandardBaseDict):
+    """Benchmarker configuration and component settings dictionary."""
+
+    profile: Profile
+    requests: dict[str, Any]
+    backend: dict[str, Any]
+    environment: dict[str, Any]
+    aggregators: dict[str, dict[str, Any]]
+
+
+class BenchmarkMetrics(StandardBaseDict):
+    """Core benchmark metrics and statistical distributions."""
+
+    requests_per_second: StatusDistributionSummary = Field(
+        description="Distribution of requests per second across benchmark execution"
+    )
+    request_concurrency: StatusDistributionSummary = Field(
+        description="Distribution of concurrent request counts during execution"
+    )
+    request_latency: StatusDistributionSummary = Field(
+        description="Distribution of request latencies for completed requests"
+    )
+
+
+BenchmarkMetricsT = TypeVar("BenchmarkMetricsT", bound=BenchmarkMetrics)
+
+
+class BenchmarkRequestStats(StandardBaseDict):
+    """Individual request processing statistics and scheduling metadata."""
+
+    scheduler_info: ScheduledRequestInfo = Field(
+        description="Scheduler metadata and timing information for the request"
+    )
+
+
+BenchmarkRequestStatsT = TypeVar("BenchmarkRequestStatsT", bound=BenchmarkRequestStats)
+
+
+class Benchmark(StandardBaseDict, Generic[BenchmarkMetricsT, BenchmarkRequestStatsT]):
+    """Base benchmark result container with execution metadata."""
+
+    type_: Literal["benchmark"] = "benchmark"
+    id_: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="Unique identifier for this benchmark execution",
+    )
+    run_id: str = Field(
+        description="Identifier for the benchmarker run containing this benchmark"
+    )
+    run_index: int = Field(
+        description="Sequential index of this benchmark within the benchmarker run"
+    )
+    scheduler: SchedulerDict = Field(
+        description="Scheduler configuration and execution state"
+    )
+    benchmarker: BenchmarkerDict = Field(
+        description="Benchmarker configuration and component settings"
+    )
+    env_args: StandardBaseDict = Field(
+        description="Environment arguments and runtime configuration"
+    )
+    extras: StandardBaseDict = Field(
+        description="Additional metadata and custom benchmark parameters"
+    )
+    run_stats: BenchmarkSchedulerStats = Field(
+        description="Scheduler timing and performance statistics"
+    )
+    start_time: float = Field(
+        default=-1.0, description="Unix timestamp when the first request was initiated"
+    )
+    end_time: float = Field(
+        default=-1.0, description="Unix timestamp when the last request completed"
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def duration(self) -> float:
+        """
+        Benchmark execution duration in seconds.
+
+        :return: Time elapsed from first request start to last request completion.
+        """
+        return self.end_time - self.start_time
+
+    metrics: BenchmarkMetricsT = Field(
+        description="Performance metrics and statistical distributions"
+    )
+    request_totals: StatusBreakdown[int, int, int, int] = Field(
+        description="Request counts by status: successful, incomplete, errored, total"
+    )
+    requests: StatusBreakdown[
+        list[BenchmarkRequestStatsT],
+        list[BenchmarkRequestStatsT],
+        list[BenchmarkRequestStatsT],
+        None,
+    ] = Field(
+        description="Request details grouped by status: successful, incomplete, errored"
+    )
+
+
+BenchmarkT = TypeVar("BenchmarkT", bound=Benchmark)
+
+
+class GenerativeRequestStats(BenchmarkRequestStats):
+    """Request statistics for generative AI text generation workloads."""
+
+    type_: Literal["generative_request_stats"] = "generative_request_stats"
+    request_id: str = Field(description="Unique identifier for the request")
+    request_type: Literal["text_completions", "chat_completions"] = Field(
+        description="Type of generative request: text or chat completion"
+    )
+    prompt: str = Field(description="Input text prompt for generation")
+    request_args: dict[str, Any] = Field(
+        description="Generation parameters and configuration options"
+    )
+    output: str | None = Field(
+        description="Generated text output, if request completed successfully"
+    )
+    iterations: int = Field(
+        description="Number of processing iterations for the request"
+    )
+    prompt_tokens: int | None = Field(
+        description="Number of tokens in the input prompt"
+    )
+    output_tokens: int | None = Field(
+        description="Number of tokens in the generated output"
+    )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def total_tokens(self) -> int | None:
+        """
+        Total token count including prompt and output tokens.
+
+        :return: Sum of prompt and output tokens, or None if either is unavailable.
+        """
+        if self.prompt_tokens is None and self.output_tokens is None:
+            return None
+
+        return (self.prompt_tokens or 0) + (self.output_tokens or 0)
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def request_latency(self) -> float | None:
+        """
+        End-to-end request processing latency in seconds.
+
+        :return: Duration from request start to completion, or None if unavailable.
+        """
+        if (
+            not self.scheduler_info.request_timings.request_end
+            or not self.scheduler_info.request_timings.request_start
+        ):
+            return None
+
+        return (
+            self.scheduler_info.request_timings.request_end
+            - self.scheduler_info.request_timings.request_start
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_to_first_token_ms(self) -> float | None:
+        """
+        Time to first token generation in milliseconds.
+
+        :return: Latency from request start to first token, or None if unavailable.
+        """
+        if (
+            not self.scheduler_info.request_timings.first_iteration
+            or not self.scheduler_info.request_timings.request_start
+        ):
+            return None
+
+        return 1000 * (
+            self.scheduler_info.request_timings.first_iteration
+            - self.scheduler_info.request_timings.request_start
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def time_per_output_token_ms(self) -> float | None:
+        """
+        Average time per output token in milliseconds.
+
+        Includes time for first token and all subsequent tokens.
+
+        :return: Average milliseconds per output token, or None if unavailable.
+        """
+        if (
+            not self.scheduler_info.request_timings.request_start
+            or not self.scheduler_info.request_timings.last_iteration
+            or not self.output_tokens
+        ):
+            return None
+
+        return (
+            1000
+            * (
+                self.scheduler_info.request_timings.last_iteration
+                - self.scheduler_info.request_timings.request_start
+            )
+            / self.output_tokens
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def inter_token_latency_ms(self) -> float | None:
+        """
+        Average inter-token latency in milliseconds.
+
+        Measures time between token generations, excluding first token.
+
+        :return: Average milliseconds between tokens, or None if unavailable.
+        """
+        if (
+            not self.scheduler_info.request_timings.first_iteration
+            or not self.scheduler_info.request_timings.last_iteration
+            or not self.output_tokens
+            or self.output_tokens <= 1
+        ):
+            return None
+
+        return (
+            1000
+            * (
+                self.scheduler_info.request_timings.last_iteration
+                - self.scheduler_info.request_timings.first_iteration
+            )
+            / (self.output_tokens - 1)
+        )
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def tokens_per_second(self) -> float | None:
+        """
+        Overall token throughput including prompt and output tokens.
+
+        :return: Total tokens per second, or None if unavailable.
+        """
+        if not (latency := self.request_latency) or not (tokens := self.total_tokens):
+            return None
+
+        return tokens / latency
+
+    @computed_field  # type: ignore[misc]
+    @property
+    def output_tokens_per_second(self) -> float | None:
+        """
+        Output token generation throughput.
+
+        :return: Output tokens per second, or None if unavailable.
+        """
+        if not (latency := self.request_latency) or not self.output_tokens:
+            return None
+
+        return self.output_tokens / latency
+
+
+class GenerativeMetrics(BenchmarkMetrics):
+    """Comprehensive metrics for generative AI benchmarks."""
+
+    prompt_token_count: StatusDistributionSummary = Field(
+        description="Distribution of prompt token counts by request status"
+    )
+    output_token_count: StatusDistributionSummary = Field(
+        description="Distribution of output token counts by request status"
+    )
+    total_token_count: StatusDistributionSummary = Field(
+        description="Distribution of total token counts by request status"
+    )
+    time_to_first_token_ms: StatusDistributionSummary = Field(
+        description="Distribution of first token latencies in milliseconds"
+    )
+    time_per_output_token_ms: StatusDistributionSummary = Field(
+        description="Distribution of average time per output token in milliseconds"
+    )
+    inter_token_latency_ms: StatusDistributionSummary = Field(
+        description="Distribution of inter-token latencies in milliseconds"
+    )
+    output_tokens_per_second: StatusDistributionSummary = Field(
+        description="Distribution of output token generation rates"
+    )
+    tokens_per_second: StatusDistributionSummary = Field(
+        description="Distribution of total token throughput including prompt and output"
+    )
+
+
+class GenerativeBenchmark(Benchmark[GenerativeMetrics, GenerativeRequestStats]):
+    """Complete generative AI benchmark results with specialized metrics."""
+
+    type_: Literal["generative_benchmark"] = "generative_benchmark"  # type: ignore[assignment]
+
+
+class GenerativeBenchmarksReport(StandardBaseModel):
+    """Container for multiple benchmark results with load/save functionality."""
+
+    DEFAULT_FILE: ClassVar[str] = "benchmarks.json"
+
+    @staticmethod
+    def load_file(
+        path: str | Path, type_: Literal["json", "yaml"] | None = None
+    ) -> GenerativeBenchmarksReport:
+        """
+        Load a report from a file.
+
+        :param path: The path to load the report from.
+        :param type_: File type override, auto-detected from extension if None.
+        :return: The loaded report.
+        :raises ValueError: If file type is unsupported.
+        """
+        path = Path(path) if not isinstance(path, Path) else path
+
+        if path.is_dir():
+            path = path / GenerativeBenchmarksReport.DEFAULT_FILE
+
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path_suffix = path.suffix.lower()[1:]
+
+        with path.open("r") as file:
+            if (type_ or path_suffix) == "json":
+                model_dict = json.loads(file.read())
+            elif (type_ or path_suffix) in ["yaml", "yml"]:
+                model_dict = yaml.safe_load(file)
+            else:
+                raise ValueError(f"Unsupported file type: {type_} for {path}.")
+
+        return GenerativeBenchmarksReport.model_validate(model_dict)
+
+    benchmarks: list[GenerativeBenchmark] = Field(
+        description="The list of completed benchmarks contained within the report.",
+        default_factory=list,
+    )
+
+    def save_file(
+        self, path: str | Path | None, type_: Literal["json", "yaml"] | None = None
+    ) -> Path:
+        """
+        Save the report to a file.
+
+        :param path: The path to save the report to.
+        :param type_: File type override, auto-detected from extension if None.
+        :return: The path to the saved report.
+        :raises ValueError: If file type is unsupported.
+        """
+        if path is None:
+            path = Path.cwd()
+        elif not isinstance(path, Path):
+            path = Path(path)
+
+        if path.is_dir():
+            path = path / GenerativeBenchmarksReport.DEFAULT_FILE
+
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path_suffix = path.suffix.lower()[1:]
+        model_dict = self.model_dump()
+
+        if (type_ or path_suffix) == "json":
+            save_str = json.dumps(model_dict)
+        elif (type_ or path_suffix) in ["yaml", "yml"]:
+            save_str = yaml.dump(model_dict)
+        else:
+            raise ValueError(f"Unsupported file type: {type_} for {path}.")
+
+        with path.open("w") as file:
+            file.write(save_str)
+
+        return path
diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py
index 6759f16f..95b51d70 100644
--- a/src/guidellm/benchmark/output.py
+++ b/src/guidellm/benchmark/output.py
@@ -1,429 +1,318 @@
+from __future__ import annotations
+
 import csv
 import json
 import math
+from abc import ABC, abstractmethod
 from collections import OrderedDict
-from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Literal, Optional, Union
+from typing import Any, ClassVar
 
-import yaml
-from pydantic import Field
+from pydantic import BaseModel, ConfigDict, Field
 from rich.console import Console
 from rich.padding import Padding
 from rich.text import Text
 
-from guidellm.benchmark.benchmark import GenerativeBenchmark, GenerativeMetrics
+from guidellm.benchmark.objects import (
+    GenerativeBenchmark,
+    GenerativeBenchmarksReport,
+    GenerativeMetrics,
+)
 from guidellm.benchmark.profile import (
     AsyncProfile,
     ConcurrentProfile,
     SweepProfile,
     ThroughputProfile,
 )
-from guidellm.objects import (
-    DistributionSummary,
-    StandardBaseModel,
-    StatusDistributionSummary,
-)
 from guidellm.presentation import UIDataBuilder
 from guidellm.presentation.injector import create_report
-from guidellm.scheduler import strategy_display_str
 from guidellm.settings import settings
-from guidellm.utils import Colors, split_text_list_by_length
-from guidellm.utils.dict import recursive_key_update
-from guidellm.utils.text import camelize_str
+from guidellm.utils import (
+    Colors,
+    DistributionSummary,
+    RegistryMixin,
+    StatusDistributionSummary,
+    safe_format_timestamp,
+    split_text_list_by_length,
+)
 
 __all__ = [
-    "GenerativeBenchmarksConsole",
-    "GenerativeBenchmarksReport",
+    "GenerativeBenchmarkerCSV",
+    "GenerativeBenchmarkerConsole",
+    "GenerativeBenchmarkerHTML",
+    "GenerativeBenchmarkerOutput",
 ]
 
 
-class GenerativeBenchmarksReport(StandardBaseModel):
-    """
-    A pydantic model representing a completed benchmark report.
-    Contains a list of benchmarks along with convenience methods for finalizing
-    and saving the report.
-    """
-
-    @staticmethod
-    def load_file(path: Union[str, Path]) -> "GenerativeBenchmarksReport":
-        """
-        Load a report from a file. The file type is determined by the file extension.
-        If the file is a directory, it expects a file named benchmarks.json under the
-        directory.
-
-        :param path: The path to load the report from.
-        :return: The loaded report.
-        """
-        path, type_ = GenerativeBenchmarksReport._file_setup(path)
-
-        if type_ == "json":
-            with path.open("r") as file:
-                model_dict = json.load(file)
-
-            return GenerativeBenchmarksReport.model_validate(model_dict)
-
-        if type_ == "yaml":
-            with path.open("r") as file:
-                model_dict = yaml.safe_load(file)
-
-            return GenerativeBenchmarksReport.model_validate(model_dict)
-
-        if type_ == "csv":
-            raise ValueError(f"CSV file type is not supported for loading: {path}.")
-
-        if type_ == "html":
-            raise ValueError(f"HTML file type is not supported for loading: {path}.")
-
-        raise ValueError(f"Unsupported file type: {type_} for {path}.")
-
-    benchmarks: list[GenerativeBenchmark] = Field(
-        description="The list of completed benchmarks contained within the report.",
-        default_factory=list,
+class GenerativeBenchmarkerOutput(
+    BaseModel, RegistryMixin[type["GenerativeBenchmarkerOutput"]], ABC
+):
+    model_config = ConfigDict(
+        extra="ignore",
+        arbitrary_types_allowed=True,
+        validate_assignment=True,
+        from_attributes=True,
+        use_enum_values=True,
     )
 
-    def set_sample_size(
-        self, sample_size: Optional[int]
-    ) -> "GenerativeBenchmarksReport":
+    @classmethod
+    @abstractmethod
+    def validated_kwargs(cls, *args, **kwargs) -> dict[str, Any]:
         """
-        Set the sample size for each benchmark in the report. In doing this, it will
-        reduce the contained requests of each benchmark to the sample size.
-        If sample size is None, it will return the report as is.
+        Validate and process arguments for constraint creation.
 
-        :param sample_size: The sample size to set for each benchmark.
-            If None, the report will be returned as is.
-        :return: The report with the sample size set for each benchmark.
-        """
-
-        if sample_size is not None:
-            for benchmark in self.benchmarks:
-                benchmark.set_sample_size(sample_size)
+        Must be implemented by subclasses to handle their specific parameter patterns.
 
-        return self
-
-    def save_file(self, path: Union[str, Path]) -> Path:
+        :param args: Positional arguments passed to the constraint
+        :param kwargs: Keyword arguments passed to the constraint
+        :return: Validated dictionary of parameters for constraint creation
+        :raises NotImplementedError: Must be implemented by subclasses
         """
-        Save the report to a file. The file type is determined by the file extension.
-        If the file is a directory, it will save the report to a file named
-        benchmarks.json under the directory.
+        ...
 
-        :param path: The path to save the report to.
-        :return: The path to the saved report.
-        """
-        path, type_ = GenerativeBenchmarksReport._file_setup(path)
-
-        if type_ == "json":
-            return self.save_json(path)
-
-        if type_ == "yaml":
-            return self.save_yaml(path)
-
-        if type_ == "csv":
-            return self.save_csv(path)
-
-        if type_ == "html":
-            return self.save_html(path)
-
-        raise ValueError(f"Unsupported file type: {type_} for {path}.")
-
-    def save_json(self, path: Union[str, Path]) -> Path:
-        """
-        Save the report to a JSON file containing all of the report data which is
-        reloadable using the pydantic model. If the file is a directory, it will save
-        the report to a file named benchmarks.json under the directory.
-
-        :param path: The path to save the report to.
-        :return: The path to the saved report.
-        """
-        path, type_ = GenerativeBenchmarksReport._file_setup(path, "json")
-
-        if type_ != "json":
-            raise ValueError(
-                f"Unsupported file type for saving a JSON: {type_} for {path}."
-            )
-
-        model_dict = self.model_dump()
-        model_json = json.dumps(model_dict)
-
-        with path.open("w") as file:
-            file.write(model_json)
-
-        return path
-
-    def save_yaml(self, path: Union[str, Path]) -> Path:
-        """
-        Save the report to a YAML file containing all of the report data which is
-        reloadable using the pydantic model. If the file is a directory, it will save
-        the report to a file named benchmarks.yaml under the directory.
-
-        :param path: The path to save the report to.
-        :return: The path to the saved report.
-        """
-
-        path, type_ = GenerativeBenchmarksReport._file_setup(path, "yaml")
-
-        if type_ != "yaml":
-            raise ValueError(
-                f"Unsupported file type for saving a YAML: {type_} for {path}."
-            )
-
-        model_dict = self.model_dump()
-        model_yaml = yaml.dump(model_dict)
-
-        with path.open("w") as file:
-            file.write(model_yaml)
-
-        return path
-
-    def save_csv(self, path: Union[str, Path]) -> Path:
-        """
-        Save the report to a CSV file containing the summarized statistics and values
-        for each report. Note, this data is not reloadable using the pydantic model.
-        If the file is a directory, it will save the report to a file named
-        benchmarks.csv under the directory.
-
-        :param path: The path to save the report to.
-        :return: The path to the saved report.
-        """
-        path, type_ = GenerativeBenchmarksReport._file_setup(path, "csv")
-
-        if type_ != "csv":
-            raise ValueError(
-                f"Unsupported file type for saving a CSV: {type_} for {path}."
+    @classmethod
+    def resolve(
+        cls,
+        output_formats: (
+            tuple[str, ...]
+            | list[str]
+            | dict[
+                str,
+                Any | dict[str, Any] | GenerativeBenchmarkerOutput,
+            ]
+            | None
+        ),
+        output_path: str | Path | None,
+    ) -> dict[str, GenerativeBenchmarkerOutput]:
+        if not output_formats:
+            return {}
+
+        if isinstance(output_formats, (list, tuple)):
+            # support list of output keys: ["csv", "json"]
+            # support list of files: ["path/to/file.json", "path/to/file.csv"]
+            formats_list = output_formats
+            output_formats = {}
+            for output_format in formats_list:
+                if not isinstance(output_format, str):
+                    raise TypeError(
+                        f"Expected string format, got {type(output_format)} for "
+                        f"{output_format} in {formats_list}"
+                    )
+                try:
+                    if cls.is_registered(output_format):
+                        output_formats[output_format] = {}
+                    else:
+                        # treat it as a file save location
+                        path = Path(output_format)
+                        format_type = path.suffix[1:].lower()
+                        output_formats[format_type] = {"output_path": path}
+
+                except Exception as err:
+                    raise ValueError(
+                        f"Failed to resolve output format '{output_format}': {err}"
+                    ) from err
+
+        resolved = {}
+
+        for key, val in output_formats.items():
+            if isinstance(val, GenerativeBenchmarkerOutput):
+                resolved[key] = val
+            else:
+                output_class = cls.get_registered_object(key)
+                kwargs = {"output_path": output_path}
+
+                if isinstance(val, dict):
+                    kwargs.update(val)
+                    kwargs = output_class.validated_kwargs(**kwargs)
+                else:
+                    kwargs = output_class.validated_kwargs(val, **kwargs)
+
+                resolved[key] = output_class(**kwargs)
+
+        return resolved
+
+    @abstractmethod
+    async def finalize(self, report: GenerativeBenchmarksReport) -> Any: ...
+
+
+@GenerativeBenchmarkerOutput.register(["json", "yaml"])
+class GenerativeBenchmarkerSerialized(GenerativeBenchmarkerOutput):
+    @classmethod
+    def validated_kwargs(
+        cls, output_path: str | Path | None, **_kwargs
+    ) -> dict[str, Any]:
+        new_kwargs = {}
+        if output_path is not None:
+            new_kwargs["output_path"] = (
+                Path(output_path) if not isinstance(output_path, Path) else output_path
             )
+        return new_kwargs
 
-        with path.open("w", newline="") as file:
-            writer = csv.writer(file)
-            headers: list[str] = []
-            rows: list[list[Union[str, float, list[float]]]] = []
-
-            for benchmark in self.benchmarks:
-                benchmark_headers: list[str] = []
-                benchmark_values: list[Union[str, float, list[float]]] = []
-
-                desc_headers, desc_values = self._benchmark_desc_headers_and_values(
-                    benchmark
-                )
-                benchmark_headers += desc_headers
-                benchmark_values += desc_values
+    output_path: Path = Field(default_factory=lambda: Path.cwd())
 
-                for status in StatusDistributionSummary.model_fields:
-                    status_headers, status_values = (
-                        self._benchmark_status_headers_and_values(benchmark, status)
-                    )
-                    benchmark_headers += status_headers
-                    benchmark_values += status_values
+    async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
+        return report.save_file(self.output_path)
 
-                benchmark_extra_headers, benchmark_extra_values = (
-                    self._benchmark_extras_headers_and_values(benchmark)
-                )
-                benchmark_headers += benchmark_extra_headers
-                benchmark_values += benchmark_extra_values
 
-                if not headers:
-                    headers = benchmark_headers
-                rows.append(benchmark_values)
+@GenerativeBenchmarkerOutput.register("console")
+class GenerativeBenchmarkerConsole(GenerativeBenchmarkerOutput):
+    """Console output formatter for benchmark results with rich formatting."""
 
-            writer.writerow(headers)
-            for row in rows:
-                writer.writerow(row)
+    @classmethod
+    def validated_kwargs(cls, *_args, **_kwargs) -> dict[str, Any]:
+        return {}
 
-        return path
+    console: Console = Field(default_factory=Console)
 
-    def save_html(self, path: Union[str, Path]) -> Path:
+    async def finalize(self, report: GenerativeBenchmarksReport) -> str:
         """
-        Download html, inject report data and save to a file.
+        Print the complete benchmark report to the console.
 
-        :param path: The path to create the report at.
-        :return: The path to the report.
+        :param report: The completed benchmark report.
+        :return:
         """
+        self._print_benchmarks_metadata(report.benchmarks)
+        self._print_benchmarks_info(report.benchmarks)
+        self._print_benchmarks_stats(report.benchmarks)
 
-        data_builder = UIDataBuilder(self.benchmarks)
-        data = data_builder.to_dict()
-        camel_data = recursive_key_update(deepcopy(data), camelize_str)
-        ui_api_data = {}
-        for k, v in camel_data.items():
-            key = f"window.{k} = {{}};"
-            value = f"window.{k} = {json.dumps(v, indent=2)};\n"
-            ui_api_data[key] = value
-        return create_report(ui_api_data, path)
-
-    @staticmethod
-    def _file_setup(
-        path: Union[str, Path],
-        default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
-    ) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
-        path = Path(path) if not isinstance(path, Path) else path
+        return "printed to console"
 
-        if path.is_dir():
-            path = path / f"benchmarks.{default_file_type}"
-
-        path.parent.mkdir(parents=True, exist_ok=True)
-        path_suffix = path.suffix.lower()
-
-        if path_suffix == ".json":
-            return path, "json"
-
-        if path_suffix in [".yaml", ".yml"]:
-            return path, "yaml"
-
-        if path_suffix in [".csv"]:
-            return path, "csv"
-
-        if path_suffix in [".html"]:
-            return path, "html"
+    def _print_benchmarks_metadata(self, benchmarks: list[GenerativeBenchmark]):
+        start_time = benchmarks[0].run_stats.start_time
+        end_time = benchmarks[-1].run_stats.end_time
+        duration = end_time - start_time
 
-        raise ValueError(
-            f"Unsupported file extension: {path_suffix} for {path}; "
-            "expected json, yaml, csv, or html."
-        )
+        self._print_section_header("Benchmarks Metadata")
+        self._print_labeled_line("Run id", str(benchmarks[0].run_id))
+        self._print_labeled_line("Duration", f"{duration:.1f} seconds")
+        self._print_labeled_line("Profile", self._get_profile_str(benchmarks[0]))
 
-    @staticmethod
-    def _benchmark_desc_headers_and_values(
-        benchmark: GenerativeBenchmark,
-    ) -> tuple[list[str], list[Union[str, float]]]:
+    def _print_benchmarks_info(self, benchmarks: list[GenerativeBenchmark]):
+        sections = {
+            "Metadata": (0, 3),
+            "Requests Made": (4, 6),
+            "Prompt Tok/Req": (7, 9),
+            "Output Tok/Req": (10, 12),
+            "Prompt Tok Total": (13, 15),
+            "Output Tok Total": (16, 18),
+        }
         headers = [
-            "Type",
-            "Run Id",
-            "Id",
-            "Name",
+            "Benchmark",
             "Start Time",
             "End Time",
-            "Duration",
-        ]
-        values: list[Union[str, float]] = [
-            benchmark.type_,
-            benchmark.run_id,
-            benchmark.id_,
-            strategy_display_str(benchmark.args.strategy),
-            datetime.fromtimestamp(benchmark.start_time).strftime("%Y-%m-%d %H:%M:%S"),
-            datetime.fromtimestamp(benchmark.end_time).strftime("%Y-%m-%d %H:%M:%S"),
-            benchmark.duration,
-        ]
-
-        if len(headers) != len(values):
-            raise ValueError("Headers and values length mismatch.")
-
-        return headers, values
-
-    @staticmethod
-    def _benchmark_extras_headers_and_values(
-        benchmark: GenerativeBenchmark,
-    ) -> tuple[list[str], list[str]]:
-        headers = ["Args", "Worker", "Request Loader", "Extras"]
-        values: list[str] = [
-            json.dumps(benchmark.args.model_dump()),
-            json.dumps(benchmark.worker.model_dump()),
-            json.dumps(benchmark.request_loader.model_dump()),
-            json.dumps(benchmark.extras),
-        ]
-
-        if len(headers) != len(values):
-            raise ValueError("Headers and values length mismatch.")
-
-        return headers, values
-
-    @staticmethod
-    def _benchmark_status_headers_and_values(
-        benchmark: GenerativeBenchmark, status: str
-    ) -> tuple[list[str], list[Union[float, list[float]]]]:
-        headers = [
-            f"{status.capitalize()} Requests",
-        ]
-        values = [
-            getattr(benchmark.request_totals, status),
+            "Duration (s)",
+            "Comp",
+            "Inc",
+            "Err",
+            "Comp",
+            "Inc",
+            "Err",
+            "Comp",
+            "Inc",
+            "Err",
+            "Comp",
+            "Inc",
+            "Err",
+            "Comp",
+            "Inc",
+            "Err",
         ]
 
-        for metric in GenerativeMetrics.model_fields:
-            metric_headers, metric_values = (
-                GenerativeBenchmarksReport._benchmark_status_metrics_stats(
-                    benchmark, status, metric
-                )
+        rows = []
+        for benchmark in benchmarks:
+            rows.append(
+                [
+                    str(benchmark.scheduler.strategy),
+                    safe_format_timestamp(benchmark.start_time),
+                    safe_format_timestamp(benchmark.end_time),
+                    f"{(benchmark.end_time - benchmark.start_time):.1f}",
+                    f"{benchmark.request_totals.successful:.0f}",
+                    f"{benchmark.request_totals.incomplete:.0f}",
+                    f"{benchmark.request_totals.errored:.0f}",
+                    f"{benchmark.metrics.prompt_token_count.successful.mean:.1f}",
+                    f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f}",
+                    f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}",
+                    f"{benchmark.metrics.output_token_count.successful.mean:.1f}",
+                    f"{benchmark.metrics.output_token_count.incomplete.mean:.1f}",
+                    f"{benchmark.metrics.output_token_count.errored.mean:.1f}",
+                    f"{benchmark.metrics.prompt_token_count.successful.total_sum:.0f}",
+                    f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f}",
+                    f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}",
+                    f"{benchmark.metrics.output_token_count.successful.total_sum:.0f}",
+                    f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f}",
+                    f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}",
+                ]
             )
-            headers += metric_headers
-            values += metric_values
 
-        if len(headers) != len(values):
-            raise ValueError("Headers and values length mismatch.")
-
-        return headers, values
+        self._print_table(headers, rows, "Benchmarks Info", sections)
 
-    @staticmethod
-    def _benchmark_status_metrics_stats(
-        benchmark: GenerativeBenchmark,
-        status: str,
-        metric: str,
-    ) -> tuple[list[str], list[Union[float, list[float]]]]:
-        status_display = status.capitalize()
-        metric_display = metric.replace("_", " ").capitalize()
-        status_dist_summary: StatusDistributionSummary = getattr(
-            benchmark.metrics, metric
-        )
-        dist_summary: DistributionSummary = getattr(status_dist_summary, status)
+    def _print_benchmarks_stats(self, benchmarks: list[GenerativeBenchmark]):
+        sections = {
+            "Metadata": (0, 0),
+            "Request Stats": (1, 2),
+            "Out Tok/sec": (3, 3),
+            "Tot Tok/sec": (4, 4),
+            "Req Latency (sec)": (5, 7),
+            "TTFT (ms)": (8, 10),
+            "ITL (ms)": (11, 13),
+            "TPOT (ms)": (14, 16),
+        }
         headers = [
-            f"{status_display} {metric_display} mean",
-            f"{status_display} {metric_display} median",
-            f"{status_display} {metric_display} std dev",
-            (
-                f"{status_display} {metric_display} "
-                "[min, 0.1, 1, 5, 10, 25, 75, 90, 95, 99, max]"
-            ),
-        ]
-        values: list[Union[float, list[float]]] = [
-            dist_summary.mean,
-            dist_summary.median,
-            dist_summary.std_dev,
-            [
-                dist_summary.min,
-                dist_summary.percentiles.p001,
-                dist_summary.percentiles.p01,
-                dist_summary.percentiles.p05,
-                dist_summary.percentiles.p10,
-                dist_summary.percentiles.p25,
-                dist_summary.percentiles.p75,
-                dist_summary.percentiles.p90,
-                dist_summary.percentiles.p95,
-                dist_summary.percentiles.p99,
-                dist_summary.max,
-            ],
+            "Benchmark",
+            "Per Second",
+            "Concurrency",
+            "mean",
+            "mean",
+            "mean",
+            "median",
+            "p99",
+            "mean",
+            "median",
+            "p99",
+            "mean",
+            "median",
+            "p99",
+            "mean",
+            "median",
+            "p99",
         ]
 
-        if len(headers) != len(values):
-            raise ValueError("Headers and values length mismatch.")
-
-        return headers, values
-
-
-class GenerativeBenchmarksConsole:
-    """
-    A class for outputting progress and benchmark results to the console.
-    Utilizes the rich library for formatting, enabling colored and styled output.
-    """
-
-    def __init__(self, enabled: bool = True):
-        """
-        :param enabled: Whether to enable console output. Defaults to True.
-            If False, all console output will be suppressed.
-        """
-        self.enabled = enabled
-        self.benchmarks: Optional[list[GenerativeBenchmark]] = None
-        self.console = Console()
+        rows = []
+        for benchmark in benchmarks:
+            rows.append(
+                [
+                    str(benchmark.scheduler.strategy),
+                    f"{benchmark.metrics.requests_per_second.successful.mean:.2f}",
+                    f"{benchmark.metrics.request_concurrency.successful.mean:.2f}",
+                    f"{benchmark.metrics.output_tokens_per_second.successful.mean:.1f}",
+                    f"{benchmark.metrics.tokens_per_second.successful.mean:.1f}",
+                    f"{benchmark.metrics.request_latency.successful.mean:.2f}",
+                    f"{benchmark.metrics.request_latency.successful.median:.2f}",
+                    f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}",
+                    f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f}",
+                    f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f}",
+                    f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}",
+                    f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f}",
+                    f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f}",
+                    f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}",
+                    f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f}",
+                    f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f}",
+                    f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}",
+                ]
+            )
 
-    @property
-    def benchmarks_profile_str(self) -> str:
-        """
-        :return: A string representation of the profile used for the benchmarks.
-        """
-        profile = self.benchmarks[0].args.profile if self.benchmarks else None
+        self._print_table(headers, rows, "Benchmarks Stats", sections)
 
+    def _get_profile_str(self, benchmark: GenerativeBenchmark) -> str:
+        profile = benchmark.benchmarker.profile
         if profile is None:
             return "None"
 
         profile_args = OrderedDict(
             {
                 "type": profile.type_,
-                "strategies": profile.strategy_types,
+                "strategies": getattr(profile, "strategy_types", []),
             }
         )
 
@@ -434,22 +323,13 @@ def benchmarks_profile_str(self) -> str:
         elif isinstance(profile, AsyncProfile):
             profile_args["max_concurrency"] = str(profile.max_concurrency)
             profile_args["rate"] = str(profile.rate)
-            profile_args["initial_burst"] = str(profile.initial_burst)
         elif isinstance(profile, SweepProfile):
             profile_args["sweep_size"] = str(profile.sweep_size)
 
         return ", ".join(f"{key}={value}" for key, value in profile_args.items())
 
-    @property
-    def benchmarks_args_str(self) -> str:
-        """
-        :return: A string representation of the arguments used for the benchmarks.
-        """
-        args = self.benchmarks[0].args if self.benchmarks else None
-
-        if args is None:
-            return "None"
-
+    def _get_args_str(self, benchmark: GenerativeBenchmark) -> str:
+        args = benchmark.args
         args_dict = OrderedDict(
             {
                 "max_number": args.max_number,
@@ -460,111 +340,45 @@ def benchmarks_args_str(self) -> str:
                 "cooldown_duration": args.cooldown_duration,
             }
         )
-
         return ", ".join(f"{key}={value}" for key, value in args_dict.items())
 
-    @property
-    def benchmarks_worker_desc_str(self) -> str:
-        """
-        :return: A string representation of the worker used for the benchmarks.
-        """
-        return str(self.benchmarks[0].worker) if self.benchmarks else "None"
-
-    @property
-    def benchmarks_request_loader_desc_str(self) -> str:
-        """
-        :return: A string representation of the request loader used for the benchmarks.
-        """
-        return str(self.benchmarks[0].request_loader) if self.benchmarks else "None"
-
-    @property
-    def benchmarks_extras_str(self) -> str:
-        """
-        :return: A string representation of the extras used for the benchmarks.
-        """
-        extras = self.benchmarks[0].extras if self.benchmarks else None
-
-        if not extras:
-            return "None"
-
-        return ", ".join(f"{key}={value}" for key, value in extras.items())
-
-    def print_section_header(self, title: str, indent: int = 0, new_lines: int = 2):
-        """
-        Print out a styled section header to the console.
-        The title is underlined, bolded, and colored with the INFO color.
-
-        :param title: The title of the section.
-        :param indent: The number of spaces to indent the title.
-            Defaults to 0.
-        :param new_lines: The number of new lines to print before the title.
-            Defaults to 2.
-        """
-        self.print_line(
-            value=f"{title}:",
-            style=f"bold underline {Colors.INFO}",
+    def _print_section_header(self, title: str, indent: int = 0, new_lines: int = 2):
+        self._print_line(
+            f"{title}:",
+            f"bold underline {Colors.info}",
             indent=indent,
             new_lines=new_lines,
         )
 
-    def print_labeled_line(
+    def _print_labeled_line(
         self, label: str, value: str, indent: int = 4, new_lines: int = 0
     ):
-        """
-        Print out a styled, labeled line (label: value) to the console.
-        The label is bolded and colored with the INFO color,
-        and the value is italicized.
-
-        :param label: The label of the line.
-        :param value: The value of the line.
-        :param indent: The number of spaces to indent the line.
-            Defaults to 4.
-        :param new_lines: The number of new lines to print before the line.
-            Defaults to 0.
-        """
-        self.print_line(
-            value=[label + ":", value],
-            style=["bold " + Colors.INFO, "italic"],
+        self._print_line(
+            [label + ":", value],
+            ["bold " + Colors.info, "italic"],
             new_lines=new_lines,
             indent=indent,
         )
 
-    def print_line(
+    def _print_line(
         self,
-        value: Union[str, list[str]],
-        style: Union[str, list[str]] = "",
+        value: str | list[str],
+        style: str | list[str] = "",
         indent: int = 0,
         new_lines: int = 0,
     ):
-        """
-        Print out a a value to the console as a line with optional indentation.
-
-        :param value: The value to print.
-        :param style: The style to apply to the value.
-            Defaults to none.
-        :param indent: The number of spaces to indent the line.
-            Defaults to 0.
-        :param new_lines: The number of new lines to print before the value.
-            Defaults to 0.
-        """
-        if not self.enabled:
-            return
-
         text = Text()
-
         for _ in range(new_lines):
             text.append("\n")
 
         if not isinstance(value, list):
             value = [value]
-
         if not isinstance(style, list):
             style = [style for _ in range(len(value))]
 
         if len(value) != len(style):
             raise ValueError(
-                f"Value and style length mismatch. Value length: {len(value)}, "
-                f"Style length: {len(style)}."
+                f"Value and style length mismatch: {len(value)} vs {len(style)}"
             )
 
         for val, sty in zip(value, style):
@@ -572,128 +386,81 @@ def print_line(
 
         self.console.print(Padding.indent(text, indent))
 
-    def print_table(
+    def _print_table(
         self,
         headers: list[str],
         rows: list[list[Any]],
         title: str,
-        sections: Optional[dict[str, tuple[int, int]]] = None,
-        max_char_per_col: int = 2**10,
+        sections: dict[str, tuple[int, int]] | None = None,
+        max_char_per_col: int = 1024,
         indent: int = 0,
         new_lines: int = 2,
     ):
-        """
-        Print a table to the console with the given headers and rows.
-
-        :param headers: The headers of the table.
-        :param rows: The rows of the table.
-        :param title: The title of the table.
-        :param sections: The sections of the table grouping columns together.
-            This is a mapping of the section display name to a tuple of the start and
-            end column indices. If None, no sections are added (default).
-        :param max_char_per_col: The maximum number of characters per column.
-        :param indent: The number of spaces to indent the table.
-            Defaults to 0.
-        :param new_lines: The number of new lines to print before the table.
-            Defaults to 0.
-        """
-
         if rows and any(len(row) != len(headers) for row in rows):
             raise ValueError(
-                f"Headers and rows length mismatch. Headers length: {len(headers)}, "
-                f"Row length: {len(rows[0]) if rows else 'N/A'}."
+                "Headers and rows length mismatch: "
+                f"{len(headers)} vs {len(rows[0]) if rows else 'N/A'}"
             )
 
-        max_characters_per_column = self.calculate_max_chars_per_column(
+        max_chars_per_column = self._calculate_max_chars_per_column(
             headers, rows, sections, max_char_per_col
         )
 
-        self.print_section_header(title, indent=indent, new_lines=new_lines)
-        self.print_table_divider(
-            max_characters_per_column, include_separators=False, indent=indent
-        )
+        self._print_section_header(title, indent=indent, new_lines=new_lines)
+        self._print_table_divider(max_chars_per_column, False, indent)
         if sections:
-            self.print_table_sections(
-                sections, max_characters_per_column, indent=indent
-            )
-        self.print_table_row(
-            split_text_list_by_length(headers, max_characters_per_column),
-            style=f"bold {Colors.INFO}",
-            indent=indent,
-        )
-        self.print_table_divider(
-            max_characters_per_column, include_separators=True, indent=indent
+            self._print_table_sections(sections, max_chars_per_column, indent)
+        self._print_table_row(
+            split_text_list_by_length(headers, max_chars_per_column),
+            f"bold {Colors.info}",
+            indent,
         )
+        self._print_table_divider(max_chars_per_column, True, indent)
         for row in rows:
-            self.print_table_row(
-                split_text_list_by_length(row, max_characters_per_column),
-                style="italic",
-                indent=indent,
+            self._print_table_row(
+                split_text_list_by_length(row, max_chars_per_column),
+                "italic",
+                indent,
             )
-        self.print_table_divider(
-            max_characters_per_column, include_separators=False, indent=indent
-        )
+        self._print_table_divider(max_chars_per_column, False, indent)
 
-    def calculate_max_chars_per_column(
+    def _calculate_max_chars_per_column(
         self,
         headers: list[str],
         rows: list[list[Any]],
-        sections: Optional[dict[str, tuple[int, int]]],
+        sections: dict[str, tuple[int, int]] | None,
         max_char_per_col: int,
     ) -> list[int]:
-        """
-        Calculate the maximum number of characters per column in the table.
-        This is done by checking the length of the headers, rows, and optional sections
-        to ensure all columns are accounted for and spaced correctly.
-
-        :param headers: The headers of the table.
-        :param rows: The rows of the table.
-        :param sections: The sections of the table grouping columns together.
-            This is a mapping of the section display name to a tuple of the start and
-            end column indices. If None, no sections are added (default).
-        :param max_char_per_col: The maximum number of characters per column.
-        :return: A list of the maximum number of characters per column.
-        """
-        max_characters_per_column = []
+        """Calculate maximum characters per column for table formatting."""
+        max_chars_per_column = []
         for ind in range(len(headers)):
-            max_characters_per_column.append(min(len(headers[ind]), max_char_per_col))
-
+            max_chars_per_column.append(min(len(headers[ind]), max_char_per_col))
             for row in rows:
-                max_characters_per_column[ind] = max(
-                    max_characters_per_column[ind], len(str(row[ind]))
+                max_chars_per_column[ind] = max(
+                    max_chars_per_column[ind], len(str(row[ind]))
                 )
 
         if not sections:
-            return max_characters_per_column
+            return max_chars_per_column
 
-        for section in sections:
-            start_col, end_col = sections[section]
-            min_section_len = len(section) + (
-                end_col - start_col
-            )  # ensure we have enough space for separators
+        for section, (start_col, end_col) in sections.items():
+            min_section_len = len(section) + (end_col - start_col)
             chars_in_columns = sum(
-                max_characters_per_column[start_col : end_col + 1]
+                max_chars_per_column[start_col : end_col + 1]
             ) + 2 * (end_col - start_col)
             if min_section_len > chars_in_columns:
                 add_chars_per_col = math.ceil(
                     (min_section_len - chars_in_columns) / (end_col - start_col + 1)
                 )
                 for col in range(start_col, end_col + 1):
-                    max_characters_per_column[col] += add_chars_per_col
+                    max_chars_per_column[col] += add_chars_per_col
 
-        return max_characters_per_column
+        return max_chars_per_column
 
-    def print_table_divider(
+    def _print_table_divider(
         self, max_chars_per_column: list[int], include_separators: bool, indent: int = 0
     ):
-        """
-        Print a divider line for the table (top and bottom of table with '=' characters)
-
-        :param max_chars_per_column: The maximum number of characters per column.
-        :param include_separators: Whether to include separators between columns.
-        :param indent: The number of spaces to indent the line.
-            Defaults to 0.
-        """
+        """Print table divider line."""
         if include_separators:
             columns = [
                 settings.table_headers_border_char * max_chars
@@ -706,29 +473,15 @@ def print_table_divider(
                 settings.table_border_char * (max_chars + 2)
                 for max_chars in max_chars_per_column
             ]
-
         columns[-1] = columns[-1][:-2]
-        self.print_line(value=columns, style=Colors.INFO, indent=indent)
+        self._print_line(columns, Colors.info, indent)
 
-    def print_table_sections(
+    def _print_table_sections(
         self,
         sections: dict[str, tuple[int, int]],
         max_chars_per_column: list[int],
         indent: int = 0,
     ):
-        """
-        Print the sections of the table with corresponding separators to the columns
-        the sections are mapped to to ensure it is compliant with a CSV format.
-        For example, a section named "Metadata" with columns 0-3 will print this:
-        Metadata               ,,,,
-        Where the spaces plus the separators at the end will span the columns 0-3.
-        All columns must be accounted for in the sections.
-
-        :param sections: The sections of the table.
-        :param max_chars_per_column: The maximum number of characters per column.
-        :param indent: The number of spaces to indent the line.
-            Defaults to 0.
-        """
         section_tuples = [(start, end, name) for name, (start, end) in sections.items()]
         section_tuples.sort(key=lambda x: x[0])
 
@@ -752,30 +505,23 @@ def print_table_sections(
                 end_col - start_col + 1
             )
             num_separators = end_col - start_col
-            line_values.append(section)
-            line_styles.append("bold " + Colors.INFO)
-            line_values.append(
-                " " * (section_length - len(section) - num_separators - 2)
+            line_values.extend(
+                [
+                    section,
+                    " " * (section_length - len(section) - num_separators - 2),
+                    settings.table_column_separator_char * num_separators,
+                    settings.table_column_separator_char + " ",
+                ]
             )
-            line_styles.append("")
-            line_values.append(settings.table_column_separator_char * num_separators)
-            line_styles.append("")
-            line_values.append(settings.table_column_separator_char + " ")
-            line_styles.append(Colors.INFO)
+            line_styles.extend(["bold " + Colors.info, "", "", Colors.info])
+
         line_values = line_values[:-1]
         line_styles = line_styles[:-1]
-        self.print_line(value=line_values, style=line_styles, indent=indent)
+        self._print_line(line_values, line_styles, indent)
 
-    def print_table_row(
+    def _print_table_row(
         self, column_lines: list[list[str]], style: str, indent: int = 0
     ):
-        """
-        Print a single row of a table to the console.
-
-        :param column_lines: The lines of text to print for each column.
-        :param indent: The number of spaces to indent the line.
-            Defaults to 0.
-        """
         for row in range(len(column_lines[0])):
             print_line = []
             print_styles = []
@@ -787,212 +533,203 @@ def print_table_row(
                         " ",
                     ]
                 )
-                print_styles.extend([style, Colors.INFO, ""])
+                print_styles.extend([style, Colors.info, ""])
             print_line = print_line[:-2]
             print_styles = print_styles[:-2]
-            self.print_line(value=print_line, style=print_styles, indent=indent)
+            self._print_line(print_line, print_styles, indent)
 
-    def print_benchmarks_metadata(self):
-        """
-        Print out the metadata of the benchmarks to the console including the run id,
-        duration, profile, args, worker, request loader, and extras.
-        """
 
-        if not self.benchmarks:
-            raise ValueError(
-                "No benchmarks to print metadata for. Please set benchmarks first."
-            )
+@GenerativeBenchmarkerOutput.register("csv")
+class GenerativeBenchmarkerCSV(GenerativeBenchmarkerOutput):
+    """CSV output formatter for benchmark results."""
 
-        start_time = self.benchmarks[0].run_stats.start_time
-        end_time = self.benchmarks[-1].run_stats.end_time
-        duration = end_time - start_time
+    DEFAULT_FILE: ClassVar[str] = "benchmarks.csv"
 
-        self.print_section_header(title="Benchmarks Metadata")
-        self.print_labeled_line(
-            label="Run id",
-            value=str(self.benchmarks[0].run_id),
-        )
-        self.print_labeled_line(
-            label="Duration",
-            value=f"{duration:.1f} seconds",
-        )
-        self.print_labeled_line(
-            label="Profile",
-            value=self.benchmarks_profile_str,
-        )
-        self.print_labeled_line(
-            label="Args",
-            value=self.benchmarks_args_str,
-        )
-        self.print_labeled_line(
-            label="Worker",
-            value=self.benchmarks_worker_desc_str,
-        )
-        self.print_labeled_line(
-            label="Request Loader",
-            value=self.benchmarks_request_loader_desc_str,
-        )
-        self.print_labeled_line(
-            label="Extras",
-            value=self.benchmarks_extras_str,
-        )
+    @classmethod
+    def validated_kwargs(
+        cls, output_path: str | Path | None, **_kwargs
+    ) -> dict[str, Any]:
+        new_kwargs = {}
+        if output_path is not None:
+            new_kwargs["output_path"] = (
+                Path(output_path) if not isinstance(output_path, Path) else output_path
+            )
+        return new_kwargs
+
+    output_path: Path = Field(default_factory=lambda: Path.cwd())
 
-    def print_benchmarks_info(self):
+    async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
         """
-        Print out the benchmark information to the console including the start time,
-        end time, duration, request totals, and token totals for each benchmark.
+        Save the benchmark report as a CSV file.
+
+        :param report: The completed benchmark report.
+        :return: Path to the saved CSV file.
         """
-        if not self.benchmarks:
-            raise ValueError(
-                "No benchmarks to print info for. Please set benchmarks first."
-            )
+        output_path = self.output_path
+        if output_path.is_dir():
+            output_path = output_path / GenerativeBenchmarkerCSV.DEFAULT_FILE
+        output_path.parent.mkdir(parents=True, exist_ok=True)
 
-        sections = {
-            "Metadata": (0, 3),
-            "Requests Made": (4, 6),
-            "Prompt Tok/Req": (7, 9),
-            "Output Tok/Req": (10, 12),
-            "Prompt Tok Total": (13, 15),
-            "Output Tok Total": (16, 18),
-        }
+        with output_path.open("w", newline="") as file:
+            writer = csv.writer(file)
+            headers: list[str] = []
+            rows: list[list[str | float | list[float]]] = []
+
+            for benchmark in report.benchmarks:
+                benchmark_headers: list[str] = []
+                benchmark_values: list[str | float | list[float]] = []
+
+                # Add status-based metrics
+                for status in StatusDistributionSummary.model_fields:
+                    status_headers, status_values = (
+                        self._get_benchmark_status_headers_and_values(benchmark, status)
+                    )
+                    benchmark_headers.extend(status_headers)
+                    benchmark_values.extend(status_values)
+
+                # Add extra fields
+                extras_headers, extras_values = (
+                    self._get_benchmark_extras_headers_and_values(benchmark)
+                )
+                benchmark_headers.extend(extras_headers)
+                benchmark_values.extend(extras_values)
+
+                if not headers:
+                    headers = benchmark_headers
+                rows.append(benchmark_values)
+
+            writer.writerow(headers)
+            for row in rows:
+                writer.writerow(row)
+
+        return output_path
+
+    def _get_benchmark_desc_headers_and_values(
+        self, benchmark: GenerativeBenchmark
+    ) -> tuple[list[str], list[str | float]]:
+        """Get description headers and values for a benchmark."""
         headers = [
-            "Benchmark",
+            "Type",
+            "Run Id",
+            "Id",
+            "Name",
             "Start Time",
             "End Time",
-            "Duration (s)",
-            "Comp",
-            "Inc",
-            "Err",
-            "Comp",
-            "Inc",
-            "Err",
-            "Comp",
-            "Inc",
-            "Err",
-            "Comp",
-            "Inc",
-            "Err",
-            "Comp",
-            "Inc",
-            "Err",
+            "Duration",
         ]
-        rows = []
+        values: list[str | float] = [
+            benchmark.type_,
+            benchmark.run_id,
+            benchmark.id_,
+            str(benchmark.scheduler.strategy),
+            datetime.fromtimestamp(benchmark.start_time).strftime("%Y-%m-%d %H:%M:%S"),
+            datetime.fromtimestamp(benchmark.end_time).strftime("%Y-%m-%d %H:%M:%S"),
+            benchmark.duration,
+        ]
+        return headers, values
 
-        for benchmark in self.benchmarks:
-            rows.append(
-                [
-                    strategy_display_str(benchmark.args.strategy),
-                    f"{datetime.fromtimestamp(benchmark.start_time).strftime('%H:%M:%S')}",
-                    f"{datetime.fromtimestamp(benchmark.end_time).strftime('%H:%M:%S')}",
-                    f"{(benchmark.end_time - benchmark.start_time):.1f}",
-                    f"{benchmark.request_totals.successful:.0f}",
-                    f"{benchmark.request_totals.incomplete:.0f}",
-                    f"{benchmark.request_totals.errored:.0f}",
-                    f"{benchmark.metrics.prompt_token_count.successful.mean:.1f}",
-                    f"{benchmark.metrics.prompt_token_count.incomplete.mean:.1f}",
-                    f"{benchmark.metrics.prompt_token_count.errored.mean:.1f}",
-                    f"{benchmark.metrics.output_token_count.successful.mean:.1f}",
-                    f"{benchmark.metrics.output_token_count.incomplete.mean:.1f}",
-                    f"{benchmark.metrics.output_token_count.errored.mean:.1f}",
-                    f"{benchmark.metrics.prompt_token_count.successful.total_sum:.0f}",
-                    f"{benchmark.metrics.prompt_token_count.incomplete.total_sum:.0f}",
-                    f"{benchmark.metrics.prompt_token_count.errored.total_sum:.0f}",
-                    f"{benchmark.metrics.output_token_count.successful.total_sum:.0f}",
-                    f"{benchmark.metrics.output_token_count.incomplete.total_sum:.0f}",
-                    f"{benchmark.metrics.output_token_count.errored.total_sum:.0f}",
-                ]
+    def _get_benchmark_status_headers_and_values(
+        self, benchmark: GenerativeBenchmark, status: str
+    ) -> tuple[list[str], list[float | list[float]]]:
+        """Get status-based metrics headers and values for a benchmark."""
+        headers = [f"{status.capitalize()} Requests"]
+        values = [getattr(benchmark.request_totals, status)]
+
+        for metric in GenerativeMetrics.model_fields:
+            metric_headers, metric_values = self._get_benchmark_status_metrics_stats(
+                benchmark, status, metric
             )
+            headers.extend(metric_headers)
+            values.extend(metric_values)
 
-        self.print_table(
-            headers=headers, rows=rows, title="Benchmarks Info", sections=sections
-        )
+        return headers, values
 
-    def print_benchmarks_stats(self):
-        """
-        Print out the benchmark statistics to the console including the requests per
-        second, request concurrency, output tokens per second, total tokens per second,
-        request latency, time to first token, inter token latency, and time per output
-        token for each benchmark.
-        """
-        if not self.benchmarks:
-            raise ValueError(
-                "No benchmarks to print stats for. Please set benchmarks first."
-            )
+    def _get_benchmark_status_metrics_stats(
+        self, benchmark: GenerativeBenchmark, status: str, metric: str
+    ) -> tuple[list[str], list[float | list[float]]]:
+        """Get statistical metrics for a specific status and metric."""
+        status_display = status.capitalize()
+        metric_display = metric.replace("_", " ").capitalize()
+        status_dist_summary: StatusDistributionSummary = getattr(
+            benchmark.metrics, metric
+        )
+        dist_summary: DistributionSummary = getattr(status_dist_summary, status)
 
-        sections = {
-            "Metadata": (0, 0),
-            "Request Stats": (1, 2),
-            "Out Tok/sec": (3, 3),
-            "Tot Tok/sec": (4, 4),
-            "Req Latency (sec)": (5, 7),
-            "TTFT (ms)": (8, 10),
-            "ITL (ms)": (11, 13),
-            "TPOT (ms)": (14, 16),
-        }
         headers = [
-            "Benchmark",
-            "Per Second",
-            "Concurrency",
-            "mean",
-            "mean",
-            "mean",
-            "median",
-            "p99",
-            "mean",
-            "median",
-            "p99",
-            "mean",
-            "median",
-            "p99",
-            "mean",
-            "median",
-            "p99",
+            f"{status_display} {metric_display} mean",
+            f"{status_display} {metric_display} median",
+            f"{status_display} {metric_display} std dev",
+            (
+                f"{status_display} {metric_display} "
+                "[min, 0.1, 1, 5, 10, 25, 75, 90, 95, 99, max]"
+            ),
         ]
-        rows = []
+        values: list[float | list[float]] = [
+            dist_summary.mean,
+            dist_summary.median,
+            dist_summary.std_dev,
+            [
+                dist_summary.min,
+                dist_summary.percentiles.p001,
+                dist_summary.percentiles.p01,
+                dist_summary.percentiles.p05,
+                dist_summary.percentiles.p10,
+                dist_summary.percentiles.p25,
+                dist_summary.percentiles.p75,
+                dist_summary.percentiles.p90,
+                dist_summary.percentiles.p95,
+                dist_summary.percentiles.p99,
+                dist_summary.max,
+            ],
+        ]
+        return headers, values
 
-        for benchmark in self.benchmarks:
-            rows.append(
-                [
-                    strategy_display_str(benchmark.args.strategy),
-                    f"{benchmark.metrics.requests_per_second.successful.mean:.2f}",
-                    f"{benchmark.metrics.request_concurrency.successful.mean:.2f}",
-                    f"{benchmark.metrics.output_tokens_per_second.successful.mean:.1f}",
-                    f"{benchmark.metrics.tokens_per_second.successful.mean:.1f}",
-                    f"{benchmark.metrics.request_latency.successful.mean:.2f}",
-                    f"{benchmark.metrics.request_latency.successful.median:.2f}",
-                    f"{benchmark.metrics.request_latency.successful.percentiles.p99:.2f}",
-                    f"{benchmark.metrics.time_to_first_token_ms.successful.mean:.1f}",
-                    f"{benchmark.metrics.time_to_first_token_ms.successful.median:.1f}",
-                    f"{benchmark.metrics.time_to_first_token_ms.successful.percentiles.p99:.1f}",
-                    f"{benchmark.metrics.inter_token_latency_ms.successful.mean:.1f}",
-                    f"{benchmark.metrics.inter_token_latency_ms.successful.median:.1f}",
-                    f"{benchmark.metrics.inter_token_latency_ms.successful.percentiles.p99:.1f}",
-                    f"{benchmark.metrics.time_per_output_token_ms.successful.mean:.1f}",
-                    f"{benchmark.metrics.time_per_output_token_ms.successful.median:.1f}",
-                    f"{benchmark.metrics.time_per_output_token_ms.successful.percentiles.p99:.1f}",
-                ]
+
+@GenerativeBenchmarkerOutput.register("html")
+class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput):
+    """HTML output formatter for benchmark results."""
+
+    DEFAULT_FILE: ClassVar[str] = "benchmarks.html"
+
+    @classmethod
+    def validated_kwargs(
+        cls, output_path: str | Path | None, **_kwargs
+    ) -> dict[str, Any]:
+        new_kwargs = {}
+        if output_path is not None:
+            new_kwargs["output_path"] = (
+                Path(output_path) if not isinstance(output_path, Path) else output_path
             )
+        return new_kwargs
 
-        self.print_table(
-            headers=headers,
-            rows=rows,
-            title="Benchmarks Stats",
-            sections=sections,
-        )
+    output_path: Path = Field(default_factory=lambda: Path.cwd())
 
-    def print_full_report(self):
+    async def finalize(self, report: GenerativeBenchmarksReport) -> Path:
         """
-        Print out the benchmark statistics to the console.
-        Temporarily enables the console if it's disabled.
+        Save the benchmark report as an HTML file.
 
-        Format:
-        - Metadata
-        - Info
-        - Stats
+        :param report: The completed benchmark report.
+        :return: Path to the saved HTML file.
         """
-        orig_enabled = self.enabled
-        self.enabled = True
-        self.print_benchmarks_metadata()
-        self.print_benchmarks_info()
-        self.print_benchmarks_stats()
-        self.enabled = orig_enabled
+        import humps
+
+        output_path = self.output_path
+        if output_path.is_dir():
+            output_path = output_path / GenerativeBenchmarkerHTML.DEFAULT_FILE
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        data_builder = UIDataBuilder(report.benchmarks)
+        data = data_builder.to_dict()
+        camel_data = humps.camelize(data)
+
+        ui_api_data = {}
+        for key, value in camel_data.items():
+            placeholder_key = f"window.{humps.decamelize(key)} = {{}};"
+            replacement_value = (
+                f"window.{humps.decamelize(key)} = {json.dumps(value, indent=2)};\n"
+            )
+            ui_api_data[placeholder_key] = replacement_value
+
+        create_report(ui_api_data, output_path)
+
+        return output_path
diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py
index ca25fc24..042179ba 100644
--- a/src/guidellm/benchmark/profile.py
+++ b/src/guidellm/benchmark/profile.py
@@ -1,20 +1,52 @@
-from collections.abc import Sequence
-from typing import Literal, Optional, Union
+"""
+Benchmarking profile configurations for coordinating multi-strategy execution.
+
+Provides configurable profile abstractions for orchestrating sequential and
+parallel execution of different scheduling strategies during benchmarking,
+with automatic strategy generation and constraint management.
+
+Classes:
+    Profile: Abstract base for multi-strategy benchmarking profiles.
+    SynchronousProfile: Single synchronous strategy execution profile.
+    ConcurrentProfile: Fixed-concurrency strategy execution profile.
+    ThroughputProfile: Maximum throughput strategy execution profile.
+    AsyncProfile: Rate-based asynchronous strategy execution profile.
+    SweepProfile: Adaptive multi-strategy sweep execution profile.
+
+Type Aliases:
+    ProfileType: Literal type for supported profile configurations.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from collections.abc import Generator
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Literal,
+)
 
 import numpy as np
-from pydantic import Field, computed_field
+from pydantic import Field, computed_field, field_serializer, field_validator
 
-from guidellm.objects import StandardBaseModel
 from guidellm.scheduler import (
     AsyncConstantStrategy,
     AsyncPoissonStrategy,
     ConcurrentStrategy,
+    Constraint,
+    ConstraintInitializer,
+    ConstraintsInitializerFactory,
     SchedulingStrategy,
     StrategyType,
     SynchronousStrategy,
     ThroughputStrategy,
 )
-from guidellm.settings import settings
+from guidellm.utils import PydanticClassRegistryMixin
+
+if TYPE_CHECKING:
+    from guidellm.benchmark.objects import Benchmark
 
 __all__ = [
     "AsyncProfile",
@@ -24,386 +56,661 @@
     "SweepProfile",
     "SynchronousProfile",
     "ThroughputProfile",
-    "create_profile",
 ]
 
 ProfileType = Literal["synchronous", "concurrent", "throughput", "async", "sweep"]
 
 
-class Profile(StandardBaseModel):
+class Profile(
+    PydanticClassRegistryMixin["type[Profile]"],
+    ABC,
+):
+    """
+    Abstract base for multi-strategy benchmarking execution profiles.
+
+    Coordinates sequential execution of scheduling strategies with automatic
+    strategy generation, constraint management, and completion tracking for
+    comprehensive benchmarking workflows.
+    """
+
+    schema_discriminator: ClassVar[str] = "type_"
+
+    @classmethod
+    def __pydantic_schema_base_type__(cls) -> type[Profile]:
+        if cls.__name__ == "Profile":
+            return cls
+
+        return Profile
+
+    @classmethod
+    def create(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float | int] | None,
+        random_seed: int = 42,
+        **kwargs: Any,
+    ) -> Profile:
+        """
+        Create a profile instance based on the specified type.
+
+        :param rate_type: The type of profile to create.
+        :param rate: Rate parameter for profile configuration.
+        :param random_seed: Random seed for stochastic strategies.
+        :param kwargs: Additional arguments for profile configuration.
+        :return: Configured profile instance for the specified type.
+        :raises ValueError: If the profile type is not registered.
+        """
+        profile_class: type[Profile] = cls.get_registered_object(rate_type)
+        resolved_kwargs = profile_class.resolve_args(
+            rate_type=rate_type, rate=rate, random_seed=random_seed, **kwargs
+        )
+
+        return profile_class(**resolved_kwargs)
+
+    @classmethod
+    @abstractmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve and validate arguments for profile construction.
+
+        :param rate_type: The type of the profile.
+        :param rate: Rate parameter for configuration.
+        :param random_seed: Random seed for stochastic strategies.
+        :param kwargs: Additional arguments to resolve.
+        :return: Dictionary of resolved arguments for profile construction.
+        """
+        ...
+
     type_: Literal["profile"] = Field(
-        description="The type of benchmarking profile to use.",
+        description="The type of benchmarking profile to use",
     )
-    completed_strategies: int = Field(
-        default=0,
-        description="The number of scheduling strategies generated so far.",
-    )
-    measured_rates: list[float] = Field(
+    completed_strategies: list[SchedulingStrategy] = Field(
         default_factory=list,
-        description=("The average rates measured for the strategies that have run."),
+        description="The strategies that have completed execution",
     )
-    measured_concurrencies: list[float] = Field(
-        default_factory=list,
-        description=(
-            "The average concurrency measured for the strategies that have run."
-        ),
+    constraints: dict[str, Any | dict[str, Any] | ConstraintInitializer] | None = Field(
+        default=None,
+        description="Runtime constraints to apply during strategy execution",
     )
 
-    def completed_strategy(self, average_rate: float, average_concurrency: float):
-        self.measured_rates.append(average_rate)
-        self.measured_concurrencies.append(average_concurrency)
-        self.completed_strategies += 1
-
     @computed_field  # type: ignore[misc]
     @property
     def strategy_types(self) -> list[StrategyType]:
-        return []
+        """
+        :return: List of all strategy types expected to be executed or have been
+            executed in this profile. By default, this returns just the
+            completed strategies.
+        """
+        return [strat.type_ for strat in self.completed_strategies]
+
+    def strategies_generator(
+        self,
+    ) -> Generator[
+        tuple[
+            SchedulingStrategy | None,
+            dict[str, Any | dict[str, Any] | Constraint] | None,
+        ],
+        Benchmark | None,
+        None,
+    ]:
+        """
+        Generate strategies and constraints for sequential profile execution.
+
+        :return: Generator yielding (strategy, constraints) tuples and
+            receiving benchmark results from each execution.
+        """
+        prev_strategy: SchedulingStrategy | None = None
+        prev_benchmark: Benchmark | None = None
+
+        while (
+            strategy := self.next_strategy(prev_strategy, prev_benchmark)
+        ) is not None:
+            constraints = self.next_strategy_constraints(
+                strategy, prev_strategy, prev_benchmark
+            )
+            prev_benchmark = yield (
+                strategy,
+                constraints,
+            )
+            prev_strategy = strategy
+            self.completed_strategies.append(prev_strategy)
+
+    @abstractmethod
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> SchedulingStrategy | None:
+        """
+        Generate the next strategy to execute in the profile sequence.
+
+        :param prev_strategy: The previously completed strategy.
+        :param prev_benchmark: Benchmark results from the previous strategy.
+        :return: Next strategy to execute, or None if profile is complete.
+        """
+        ...
+
+    def next_strategy_constraints(
+        self,
+        next_strategy: SchedulingStrategy | None,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> dict[str, Any | dict[str, Any] | Constraint] | None:
+        """
+        Generate constraints for the next strategy execution.
+
+        :param next_strategy: The next strategy to be executed.
+        :param prev_strategy: The previously completed strategy.
+        :param prev_benchmark: Benchmark results from the previous strategy.
+        :return: Constraints dictionary for the next strategy, or None.
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        return (
+            ConstraintsInitializerFactory.resolve(self.constraints)
+            if next_strategy and self.constraints
+            else None
+        )
 
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        return None
+    @field_validator("constraints", mode="before")
+    @classmethod
+    def _constraints_validator(
+        cls, value: Any
+    ) -> dict[str, Any | dict[str, Any] | ConstraintInitializer] | None:
+        if value is None:
+            return None
 
+        if not isinstance(value, dict):
+            raise ValueError("Constraints must be a dictionary")
 
+        return {
+            key: (
+                val
+                if not isinstance(val, ConstraintInitializer)
+                else ConstraintsInitializerFactory.deserialize(initializer_dict=val)
+            )
+            for key, val in value.items()
+        }
+
+    @field_serializer
+    def _constraints_serializer(
+        self,
+        constraints: dict[str, Any | dict[str, Any] | ConstraintInitializer] | None,
+    ) -> dict[str, Any | dict[str, Any]] | None:
+        if constraints is None:
+            return None
+
+        return {
+            key: (
+                val
+                if not isinstance(val, ConstraintInitializer)
+                else ConstraintsInitializerFactory.serialize(initializer=val)
+            )
+            for key, val in constraints.items()
+        }
+
+
+@Profile.register("synchronous")
 class SynchronousProfile(Profile):
+    """Single synchronous strategy execution profile."""
+
     type_: Literal["synchronous"] = "synchronous"  # type: ignore[assignment]
 
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for synchronous profile construction.
+
+        :param rate_type: The type/strategy of the profile (ignored).
+        :param rate: Rate parameter (must be None, will be stripped).
+        :param random_seed: Random seed (ignored and stripped).
+        :param kwargs: Additional arguments to pass through.
+        :return: Dictionary of resolved arguments.
+        :raises ValueError: If rate is not None.
+        """
+        _ = (rate_type, random_seed)  # unused
+        if rate is not None:
+            raise ValueError("SynchronousProfile does not accept a rate parameter")
+
+        return kwargs
+
     @property
     def strategy_types(self) -> list[StrategyType]:
+        """
+        :return: The single synchronous strategy type.
+        """
         return [self.type_]
 
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        if self.completed_strategies >= 1:
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> SynchronousStrategy | None:
+        """
+        Generate synchronous strategy or None if already completed.
+
+        :param prev_strategy: The previously completed strategy (unused).
+        :param prev_benchmark: Benchmark results from the previous strategy (unused).
+        :return: SynchronousStrategy for the first execution, None afterward.
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        if len(self.completed_strategies) >= 1:
             return None
 
         return SynchronousStrategy()
 
-    @staticmethod
-    def from_standard_args(
-        rate_type: Union[StrategyType, ProfileType],
-        rate: Optional[Union[float, Sequence[float]]],
-        **kwargs,
-    ) -> "SynchronousProfile":
-        if rate_type != "synchronous":
-            raise ValueError("Rate type must be 'synchronous' for synchronous profile.")
-
-        if rate is not None:
-            raise ValueError(
-                "Rate does not apply to synchronous profile, it must be set to None."
-            )
-
-        if kwargs:
-            raise ValueError(
-                "No additional arguments are allowed for synchronous profile."
-            )
-
-        return SynchronousProfile()
-
 
+@Profile.register("concurrent")
 class ConcurrentProfile(Profile):
+    """Fixed-concurrency strategy execution profile with configurable stream counts."""
+
     type_: Literal["concurrent"] = "concurrent"  # type: ignore[assignment]
-    streams: Union[int, Sequence[int]] = Field(
-        description="The number of concurrent streams to use.",
+    streams: int | list[int] = Field(
+        description="Number of concurrent streams for request scheduling",
+        gt=0,
+    )
+    startup_duration: float = Field(
+        default=0.0,
+        description=(
+            "Duration in seconds for distributing startup requests "
+            "before completion-based timing"
+        ),
+        ge=0,
     )
 
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for concurrent profile construction.
+
+        :param rate_type: The type/strategy of the profile (ignored).
+        :param rate: Rate parameter, remapped to streams.
+        :param random_seed: Random seed (ignored and stripped).
+        :param kwargs: Additional arguments to pass through.
+        :return: Dictionary of resolved arguments.
+        :raises ValueError: If rate is None.
+        """
+        _ = (rate_type, random_seed)  # unused
+        kwargs["streams"] = rate
+        return kwargs
+
     @property
     def strategy_types(self) -> list[StrategyType]:
-        num_strategies = len(self.streams) if isinstance(self.streams, Sequence) else 1
-
+        """Get concurrent strategy types for each configured stream count."""
+        num_strategies = len(self.streams) if isinstance(self.streams, list) else 1
         return [self.type_] * num_strategies
 
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        streams = self.streams if isinstance(self.streams, Sequence) else [self.streams]
-
-        if self.completed_strategies >= len(streams):
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> ConcurrentStrategy | None:
+        """
+        Generate concurrent strategy for the next stream count.
+
+        :param prev_strategy: The previously completed strategy (unused).
+        :param prev_benchmark: Benchmark results from the previous strategy (unused).
+        :return: ConcurrentStrategy with next stream count, or None if complete.
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        streams = self.streams if isinstance(self.streams, list) else [self.streams]
+
+        if len(self.completed_strategies) >= len(streams):
             return None
 
         return ConcurrentStrategy(
-            streams=streams[self.completed_strategies],
+            streams=streams[len(self.completed_strategies)],
+            startup_duration=self.startup_duration,
         )
 
-    @staticmethod
-    def from_standard_args(
-        rate_type: Union[StrategyType, ProfileType],
-        rate: Optional[Union[float, Sequence[float]]],
-        **kwargs,
-    ) -> "ConcurrentProfile":
-        if rate_type != "concurrent":
-            raise ValueError("Rate type must be 'concurrent' for concurrent profile.")
-
-        if not rate:
-            raise ValueError("Rate (streams) must be provided for concurrent profile.")
-
-        if not isinstance(rate, Sequence):
-            rate = [rate]
-
-        if not all(stream.is_integer() and stream > 0 for stream in rate):
-            raise ValueError(
-                f"All rate values (streams) must be positive integers, received {rate}"
-            )
-
-        if kwargs:
-            raise ValueError(
-                "No additional arguments are allowed for concurrent profile."
-            )
-
-        return ConcurrentProfile(streams=[int(rat) for rat in rate])
-
 
+@Profile.register("throughput")
 class ThroughputProfile(Profile):
+    """
+    Maximum throughput strategy execution profile with optional concurrency limits.
+    """
+
     type_: Literal["throughput"] = "throughput"  # type: ignore[assignment]
-    max_concurrency: Optional[int] = Field(
+    max_concurrency: int | None = Field(
         default=None,
-        description="The maximum number of concurrent requests that can be scheduled.",
+        description="Maximum number of concurrent requests to schedule",
+        gt=0,
+    )
+    startup_duration: float = Field(
+        default=0.0,
+        description=(
+            "Duration in seconds for distributing startup requests "
+            "before full throughput scheduling"
+        ),
+        ge=0,
     )
 
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for throughput profile construction.
+
+        :param rate_type: The type/strategy of the profile (ignored).
+        :param rate: Rate parameter to remap to max_concurrency.
+        :param random_seed: Random seed (ignored and stripped).
+        :param kwargs: Additional arguments to pass through.
+        :return: Dictionary of resolved arguments.
+        """
+        _ = (rate_type, random_seed)  # unused
+        # Remap rate to max_concurrency, strip out random_seed
+        kwargs.pop("random_seed", None)
+        if rate is not None:
+            kwargs["max_concurrency"] = rate
+        return kwargs
+
     @property
     def strategy_types(self) -> list[StrategyType]:
+        """Get the single throughput strategy type."""
         return [self.type_]
 
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        if self.completed_strategies >= 1:
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> ThroughputStrategy | None:
+        """
+        Generate throughput strategy or None if already completed.
+
+        :param prev_strategy: The previously completed strategy (unused).
+        :param prev_benchmark: Benchmark results from the previous strategy (unused).
+        :return: ThroughputStrategy for the first execution, None afterward.
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        if len(self.completed_strategies) >= 1:
             return None
 
         return ThroughputStrategy(
             max_concurrency=self.max_concurrency,
+            startup_duration=self.startup_duration,
         )
 
-    @staticmethod
-    def from_standard_args(
-        rate_type: Union[StrategyType, ProfileType],
-        rate: Optional[Union[float, Sequence[float]]],
-        **kwargs,
-    ) -> "ThroughputProfile":
-        if rate_type != "throughput":
-            raise ValueError("Rate type must be 'throughput' for throughput profile.")
-
-        if rate is not None:
-            raise ValueError(
-                "Rate does not apply to throughput profile, it must be set to None."
-            )
 
-        return ThroughputProfile(**kwargs)
+@Profile.register(["async", "constant", "poisson"])
+class AsyncProfile(Profile):
+    """
+    Rate-based asynchronous strategy execution profile with configurable patterns.
+    """
 
-
-class AsyncProfile(ThroughputProfile):
-    type_: Literal["async"] = "async"  # type: ignore[assignment]
+    type_: Literal["async", "constant", "poisson"] = "async"  # type: ignore[assignment]
     strategy_type: Literal["constant", "poisson"] = Field(
-        description="The type of asynchronous strategy to use.",
+        description="Type of asynchronous strategy pattern to use",
     )
-    rate: Union[float, Sequence[float]] = Field(
-        description="The rate of requests per second to use.",
+    rate: float | list[float] = Field(
+        description="Request scheduling rate in requests per second",
+        gt=0,
     )
-    initial_burst: bool = Field(
-        default=True,
+    startup_duration: float = Field(
+        default=0.0,
         description=(
-            "True to send an initial burst of requests (math.floor(self.rate)) "
-            "to reach target rate. False to not send an initial burst."
+            "Duration in seconds for distributing startup requests "
+            "to converge quickly to desired rate"
         ),
+        ge=0,
+    )
+    max_concurrency: int | None = Field(
+        default=None,
+        description="Maximum number of concurrent requests to schedule",
+        gt=0,
     )
     random_seed: int = Field(
         default=42,
-        description=(
-            "The random seed to use for the asynchronous strategy. "
-            "This is used to generate random numbers for the Poisson strategy."
-        ),
+        description="Random seed for Poisson distribution strategy",
     )
 
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for async profile construction.
+
+        :param rate_type: The type/strategy of the profile.
+        :param rate: Rate parameter for the profile.
+        :param random_seed: Random seed for stochastic strategies.
+        :param kwargs: Additional arguments to pass through.
+        :return: Dictionary of resolved arguments.
+        :raises ValueError: If rate is None.
+        """
+        if rate is None:
+            raise ValueError("AsyncProfile requires a rate parameter")
+
+        kwargs["type_"] = (
+            rate_type
+            if rate_type in ["async", "constant", "poisson"]
+            else kwargs.get("type_", "async")
+        )
+        kwargs["strategy_type"] = (
+            rate_type
+            if rate_type in ["constant", "poisson"]
+            else kwargs.get("strategy_type", "constant")
+        )
+        kwargs["rate"] = rate
+        kwargs["random_seed"] = random_seed
+        return kwargs
+
     @property
     def strategy_types(self) -> list[StrategyType]:
-        num_strategies = len(self.rate) if isinstance(self.rate, Sequence) else 1
-
+        """Get async strategy types for each configured rate."""
+        num_strategies = len(self.rate) if isinstance(self.rate, list) else 1
         return [self.strategy_type] * num_strategies
 
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        rate = self.rate if isinstance(self.rate, Sequence) else [self.rate]
-
-        if self.completed_strategies >= len(rate):
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> AsyncConstantStrategy | AsyncPoissonStrategy | None:
+        """
+        Generate async strategy for the next configured rate.
+
+        :param prev_strategy: The previously completed strategy (unused).
+        :param prev_benchmark: Benchmark results from the previous strategy (unused).
+        :return: AsyncConstantStrategy or AsyncPoissonStrategy for next rate,
+            or None if all rates completed.
+        :raises ValueError: If strategy_type is neither 'constant' nor 'poisson'.
+        """
+        _ = (prev_strategy, prev_benchmark)  # unused
+        rate = self.rate if isinstance(self.rate, list) else [self.rate]
+
+        if len(self.completed_strategies) >= len(rate):
             return None
 
+        current_rate = rate[len(self.completed_strategies)]
+
         if self.strategy_type == "constant":
             return AsyncConstantStrategy(
-                rate=rate[self.completed_strategies],
-                initial_burst=self.initial_burst,
+                rate=current_rate,
+                startup_duration=self.startup_duration,
                 max_concurrency=self.max_concurrency,
             )
         elif self.strategy_type == "poisson":
             return AsyncPoissonStrategy(
-                rate=rate[self.completed_strategies],
-                initial_burst=self.initial_burst,
+                rate=current_rate,
+                startup_duration=self.startup_duration,
                 max_concurrency=self.max_concurrency,
                 random_seed=self.random_seed,
             )
         else:
             raise ValueError(f"Invalid strategy type: {self.strategy_type}")
 
-    @staticmethod
-    def from_standard_args(  # type: ignore[override]
-        rate_type: Union[StrategyType, ProfileType],
-        rate: Optional[Union[float, Sequence[float]]],
-        random_seed: int,
-        **kwargs,
-    ) -> "AsyncProfile":
-        if rate_type not in ("async", "constant", "poisson"):
-            raise ValueError(
-                "Rate type must be in ('async', 'constant', 'poisson') "
-                f"for async profile. Received: {rate_type}"
-            )
-
-        if not rate:
-            raise ValueError("Rate must be provided for async profile.")
-
-        if not isinstance(rate, Sequence):
-            rate = [rate]
-
-        if not all(isinstance(r, (float, int)) and r > 0 for r in rate):
-            raise ValueError(
-                f"All rate values must be positive numbers, received {rate}"
-            )
-
-        if rate_type == "async":
-            rate_type = "constant"  # default to constant if not specified
 
-        return AsyncProfile(
-            strategy_type=rate_type,  # type: ignore[arg-type]
-            rate=rate,
-            random_seed=random_seed,
-            **kwargs,
-        )
+@Profile.register("sweep")
+class SweepProfile(Profile):
+    """
+    Adaptive multi-strategy sweep execution profile with rate discovery.
+    """
 
-
-class SweepProfile(AsyncProfile):
     type_: Literal["sweep"] = "sweep"  # type: ignore[assignment]
     sweep_size: int = Field(
-        description="The number of strategies to generate for the sweep.",
+        description="Number of strategies to generate for the sweep",
+        ge=2,
+    )
+    strategy_type: Literal["constant", "poisson"] = "constant"
+    startup_duration: float = Field(
+        default=0.0,
+        description=(
+            "Duration in seconds for distributing startup requests "
+            "to converge quickly to desired rate"
+        ),
+        ge=0,
+    )
+    max_concurrency: int | None = Field(
+        default=None,
+        description="Maximum number of concurrent requests to schedule",
+        gt=0,
     )
-    rate: float = -1
-    rate_type: Literal["constant", "poisson"] = "constant"
+    random_seed: int = Field(
+        default=42,
+        description="Random seed for Poisson distribution strategy",
+    )
+    synchronous_rate: float = Field(
+        default=-1.0,
+        description="Measured rate from synchronous strategy execution",
+    )
+    throughput_rate: float = Field(
+        default=-1.0,
+        description="Measured rate from throughput strategy execution",
+    )
+    async_rates: list[float] = Field(
+        default_factory=list,
+        description="Generated rates for async strategy sweep",
+    )
+    measured_rates: list[float] = Field(
+        default_factory=list,
+        description="Calculated interpolated rates between synchronous and throughput",
+    )
+
+    @classmethod
+    def resolve_args(
+        cls,
+        rate_type: str,
+        rate: float | int | list[float, int] | None,
+        random_seed: int,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Resolve arguments for sweep profile construction.
+
+        :param rate_type: The type/strategy for async strategies in the sweep.
+        :param rate: Rate parameter (ignored for sweep).
+        :param random_seed: Random seed for stochastic strategies.
+        :param kwargs: Additional arguments to pass through.
+        :return: Dictionary of resolved arguments.
+        """
+        kwargs["sweep_size"] = kwargs.get("sweep_size", rate)
+        kwargs["random_seed"] = random_seed
+        if rate_type in ["constant", "poisson"]:
+            kwargs["strategy_type"] = rate_type
+        return kwargs
 
     @property
     def strategy_types(self) -> list[StrategyType]:
-        return (
-            ["synchronous"] + ["throughput"] + [self.rate_type] * (self.sweep_size - 2)  # type: ignore[return-value]
-        )
-
-    def next_strategy(self) -> Optional[SchedulingStrategy]:
-        if self.completed_strategies >= self.sweep_size:
-            return None
-
-        if self.completed_strategies == 0:
+        """Get strategy types for the complete sweep sequence."""
+        types = ["synchronous", "throughput"]
+        types += [self.strategy_type] * (self.sweep_size - len(types))
+        return types
+
+    def next_strategy(
+        self,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> (
+        AsyncConstantStrategy
+        | AsyncPoissonStrategy
+        | SynchronousProfile
+        | ThroughputProfile
+        | None
+    ):
+        """
+        Generate the next strategy in the adaptive sweep sequence.
+
+        Executes synchronous and throughput strategies first to measure
+        baseline rates, then generates interpolated rates for async strategies.
+
+        :param prev_strategy: The previously completed strategy.
+        :param prev_benchmark: Benchmark results from the previous strategy.
+        :return: Next strategy in sweep sequence, or None if complete.
+        :raises ValueError: If strategy_type is neither 'constant' nor 'poisson'.
+        """
+        if prev_strategy is None:
             return SynchronousStrategy()
 
-        if self.completed_strategies == 1:
+        if prev_strategy.type_ == "synchronous":
+            self.synchronous_rate = (
+                prev_benchmark.metrics.requests_per_second.successful.mean
+            )
+
             return ThroughputStrategy(
                 max_concurrency=self.max_concurrency,
+                startup_duration=self.startup_duration,
             )
 
-        min_rate = self.measured_rates[0]
-        max_rate = self.measured_rates[1]
-        rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:]
+        if prev_strategy.type_ == "throughput":
+            self.throughput_rate = (
+                prev_benchmark.metrics.requests_per_second.successful.mean
+            )
+            self.measured_rates = list(
+                np.linspace(
+                    self.synchronous_rate,
+                    self.throughput_rate,
+                    self.sweep_size - 1,
+                )
+            )[1:]  # don't rerun synchronous
 
-        if self.rate_type == "constant":
+        if len(self.completed_strategies) >= self.sweep_size:
+            return None
+
+        next_rate_index = len(
+            [
+                strat
+                for strat in self.completed_strategies
+                if strat.type_ == self.strategy_type
+            ]
+        )
+
+        if self.strategy_type == "constant":
             return AsyncConstantStrategy(
-                rate=rates[self.completed_strategies - 2],
-                initial_burst=self.initial_burst,
+                rate=self.measured_rates[next_rate_index],
+                startup_duration=self.startup_duration,
                 max_concurrency=self.max_concurrency,
             )
-        elif self.rate_type == "poisson":
+        elif self.strategy_type == "poisson":
             return AsyncPoissonStrategy(
-                rate=rates[self.completed_strategies - 2],
-                initial_burst=self.initial_burst,
+                rate=self.measured_rates[next_rate_index],
+                startup_duration=self.startup_duration,
                 max_concurrency=self.max_concurrency,
+                random_seed=self.random_seed,
             )
         else:
-            raise ValueError(f"Invalid strategy type: {self.rate_type}")
-
-    @staticmethod
-    def from_standard_args(  # type: ignore[override]
-        rate_type: Union[StrategyType, ProfileType],
-        rate: Optional[Union[float, Sequence[float]]],
-        random_seed: int,
-        **kwargs,
-    ) -> "SweepProfile":
-        if rate_type != "sweep":
-            raise ValueError("Rate type must be 'sweep' for sweep profile.")
-
-        if "sweep_size" in kwargs:
-            raise ValueError("Sweep size must not be provided, use rate instead.")
-
-        if isinstance(rate, Sequence):
-            if len(rate) != 1:
-                raise ValueError(
-                    "Rate must be a single value for sweep profile, received "
-                    f"{len(rate)} values."
-                )
-            rate = rate[0]
-
-        if not rate:
-            rate = settings.default_sweep_number
-
-        if not rate:
-            raise ValueError(
-                "Rate (sweep_size) must be provided for concurrent profile."
-            )
-
-        if (
-            not isinstance(rate, (int, float))
-            or (isinstance(rate, float) and not rate.is_integer())
-            or rate <= 1
-        ):
-            raise ValueError(
-                f"Rate (sweep_size) must be a positive integer > 1, received {rate} "
-                f"with type {type(rate)}"
-            )
-
-        if not kwargs:
-            kwargs = {}
-
-        if "strategy_type" not in kwargs:
-            kwargs["strategy_type"] = "constant"
-
-        return SweepProfile(sweep_size=int(rate), random_seed=random_seed, **kwargs)
-
-
-def create_profile(
-    rate_type: Union[StrategyType, ProfileType],
-    rate: Optional[Union[float, Sequence[float]]],
-    random_seed: int = 42,
-    **kwargs,
-) -> "Profile":
-    if rate_type == "synchronous":
-        return SynchronousProfile.from_standard_args(
-            rate_type=rate_type,
-            rate=rate,
-            **kwargs,
-        )
-
-    if rate_type == "concurrent":
-        return ConcurrentProfile.from_standard_args(
-            rate_type=rate_type,
-            rate=rate,
-            **kwargs,
-        )
-
-    if rate_type == "throughput":
-        return ThroughputProfile.from_standard_args(
-            rate_type=rate_type,
-            rate=rate,
-            **kwargs,
-        )
-
-    if rate_type in ("async", "constant", "poisson"):
-        return AsyncProfile.from_standard_args(
-            rate_type=rate_type,
-            rate=rate,
-            random_seed=random_seed,
-            **kwargs,
-        )
-
-    if rate_type == "sweep":
-        return SweepProfile.from_standard_args(
-            rate_type=rate_type,
-            rate=rate,
-            random_seed=random_seed,
-            **kwargs,
-        )
-
-    raise ValueError(f"Invalid profile type: {rate_type}")
+            raise ValueError(f"Invalid strategy type: {self.strategy_type}")
diff --git a/src/guidellm/benchmark/progress.py b/src/guidellm/benchmark/progress.py
index 1232107b..f93b3a83 100644
--- a/src/guidellm/benchmark/progress.py
+++ b/src/guidellm/benchmark/progress.py
@@ -1,8 +1,27 @@
-import math
-import time
+"""
+Benchmark progress tracking and console display abstractions.
+
+Provides progress tracking interfaces and implementations for monitoring benchmark
+execution, displaying real-time statistics, and managing UI updates during
+generative benchmarking operations.
+
+Classes:
+    BenchmarkerProgress: Abstract base for benchmark progress tracking.
+    BenchmarkerProgressGroup: Composite progress handler for multiple instances.
+    GenerativeConsoleBenchmarkerProgress: Console-based progress display.
+
+Type Variables:
+    BenchmarkT: Generic benchmark object type.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterable, AsyncIterator, Iterable
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Generic, Optional, TypeVar, Union
+from typing import Any, Generic, Literal
 
 from rich.console import Group
 from rich.live import Live
@@ -10,7 +29,6 @@
 from rich.progress import (
     BarColumn,
     Progress,
-    ProgressColumn,
     SpinnerColumn,
     TaskID,
     TaskProgressColumn,
@@ -19,145 +37,631 @@
     TimeRemainingColumn,
 )
 
-from guidellm.benchmark.aggregator import (
-    BenchmarkAggregator,
-    GenerativeBenchmarkAggregator,
-)
-from guidellm.benchmark.benchmark import Benchmark, GenerativeBenchmark
-from guidellm.benchmark.benchmarker import BenchmarkerResult
+from guidellm.benchmark.aggregator import AggregatorState
+from guidellm.benchmark.objects import BenchmarkT, GenerativeBenchmark
+from guidellm.benchmark.profile import Profile
 from guidellm.scheduler import (
+    SchedulerState,
     SchedulingStrategy,
     StrategyType,
-    strategy_display_str,
 )
-from guidellm.utils import Colors
+from guidellm.utils import Colors, format_value_display
 
 __all__ = [
-    "BenchmarkerProgressDisplay",
-    "BenchmarkerTaskProgressState",
-    "GenerativeTextBenchmarkerProgressDisplay",
-    "GenerativeTextBenchmarkerTaskProgressState",
+    "BenchmarkerProgress",
+    "BenchmarkerProgressGroup",
+    "GenerativeConsoleBenchmarkerProgress",
 ]
 
 
-@dataclass
-class BenchmarkerTaskProgressState:
-    display_scheduler_stats: bool
-
-    task_id: TaskID
-    strategy: Union[StrategyType, SchedulingStrategy]
-    started: bool = False
-    compiling: bool = False
-    ended: bool = False
-
-    start_time: Optional[float] = None
-    max_number: Optional[float] = None
-    max_duration: Optional[float] = None
-    in_warmup: bool = False
-    in_cooldown: bool = False
-
-    requests_rate: float = 0
-    request_latency: float = 0
-    requests_processing: float = 0
-    requests_successful: float = 0
-    requests_incomplete: float = 0
-    requests_errored: float = 0
+class BenchmarkerProgress(Generic[BenchmarkT], ABC):
+    """
+    Abstract base class for tracking and displaying benchmark progress.
+
+    Provides lifecycle hooks for monitoring benchmark execution stages including
+    initialization, start, updates, completion, and finalization. Supports
+    enable/disable functionality for conditional progress tracking.
+    """
+
+    def __init__(self, enabled: bool = True):
+        """
+        Initialize progress tracker.
 
-    worker_overheads_time_ms: float = 0.0
-    backend_overheads_time_ms: float = 0.0
-    requests_sleep_time_ms: float = 0.0
-    requests_targeted_start_time_delay_ms: float = 0.0
+        :param enabled: Whether to enable progress tracking and display.
+        """
+        self._enabled = enabled
+        self.profile: Profile = None
+        self.current_strategy: SchedulingStrategy = None
 
     @property
-    def description(self) -> str:
-        return strategy_display_str(self.strategy)
+    def enabled(self) -> bool:
+        """
+        :return: Whether progress tracking is currently enabled.
+        """
+        return self._enabled
+
+    @enabled.setter
+    def enabled(self, value: bool) -> None:
+        """
+        :param value: True to enable progress tracking, False to disable.
+        :raises RuntimeError: If called after progress run has started.
+        """
+        if self.profile is not None:
+            raise RuntimeError(
+                "Cannot change enabled state after __call__ for progress run"
+            )
+
+        self._enabled = value
+
+    def __call__(
+        self,
+        profile: Profile,
+        agen: AsyncIterable[
+            tuple[
+                AggregatorState | None,
+                BenchmarkT | None,
+                SchedulingStrategy,
+                SchedulerState | None,
+            ]
+        ],
+    ) -> AsyncIterator[
+        tuple[
+            AggregatorState | None,
+            BenchmarkT | None,
+            SchedulingStrategy,
+            SchedulerState | None,
+        ]
+    ]:
+        """
+        Track progress through benchmark execution pipeline.
+
+        Wraps the provided async generator to monitor benchmark progress,
+        calling appropriate lifecycle hooks based on execution state.
+
+        :param profile: Benchmark profile configuration.
+        :param agen: Async generator yielding benchmark execution updates.
+        :return: Async iterator forwarding original updates with progress tracking.
+        """
+
+        async def aiterator() -> AsyncIterator[
+            tuple[
+                AggregatorState | None,
+                BenchmarkT | None,
+                SchedulingStrategy,
+                SchedulerState | None,
+            ]
+        ]:
+            self.profile = profile
+            if self.enabled:
+                await self.on_initialize(profile)
+
+            async for aggregator_update, benchmark, strategy, scheduler_state in agen:
+                if self.enabled:
+                    await self.on_raw_update(
+                        profile,
+                        aggregator_update,
+                        benchmark,
+                        strategy,
+                        scheduler_state,
+                    )
+
+                    if self.current_strategy != strategy:
+                        self.current_strategy = strategy
+                        await self.on_benchmark_start(strategy)
+                    elif benchmark is not None:
+                        await self.on_benchmark_complete(benchmark)
+                        self.current_strategy = None
+                    else:
+                        await self.on_benchmark_update(
+                            aggregator_update, scheduler_state
+                        )
+
+                yield aggregator_update, benchmark, strategy, scheduler_state
+
+            if self.enabled:
+                await self.on_finalize()
+
+        return aiterator()
+
+    @abstractmethod
+    async def on_initialize(self, profile: Profile):
+        """
+        Initialize progress tracking for benchmark profile.
+
+        :param profile: Benchmark profile configuration.
+        """
+
+    @abstractmethod
+    async def on_benchmark_start(self, strategy: SchedulingStrategy):
+        """
+        Handle start of new benchmark strategy execution.
+
+        :param strategy: Scheduling strategy being executed.
+        """
+
+    @abstractmethod
+    async def on_benchmark_update(
+        self, aggregator_update: AggregatorState, scheduler_state: SchedulerState
+    ):
+        """
+        Handle benchmark execution progress update.
+
+        :param aggregator_update: Current benchmark metrics and statistics.
+        :param scheduler_state: Current scheduler execution state.
+        """
+
+    @abstractmethod
+    async def on_benchmark_complete(self, benchmark: BenchmarkT):
+        """
+        Handle completion of benchmark strategy execution.
+
+        :param benchmark: Completed benchmark results.
+        """
+
+    @abstractmethod
+    async def on_finalize(self):
+        """Finalize progress tracking and cleanup resources."""
+
+    async def on_raw_update(
+        self,
+        profile: Profile,
+        aggregator_update: AggregatorState | None,
+        benchmark: BenchmarkT | None,
+        strategy: SchedulingStrategy,
+        scheduler_state: SchedulerState | None,
+    ):
+        """
+        Handle raw benchmark execution update.
+
+        Optional hook for accessing all execution state updates. Default
+        implementation does nothing.
+
+        :param profile: Benchmark profile configuration.
+        :param aggregator_update: Current benchmark metrics and statistics.
+        :param benchmark: Completed benchmark if available.
+        :param strategy: Current scheduling strategy.
+        :param scheduler_state: Current scheduler execution state.
+        """
+
+
+class BenchmarkerProgressGroup(BenchmarkerProgress[BenchmarkT]):
+    """
+    Composite progress handler that manages multiple progress instances.
+
+    Distributes progress events to all contained progress instances, enabling
+    parallel progress tracking through multiple channels (e.g., console display
+    and file logging).
+
+    :param instances: Collection of progress handlers to manage.
+    :param enabled: Whether the group is active.
+    """
+
+    def __init__(
+        self,
+        instances: (
+            Iterable[BenchmarkerProgress[BenchmarkT]]
+            | list[BenchmarkerProgress[BenchmarkT]]
+        ),
+        enabled: bool = True,
+    ):
+        """
+        Initialize progress group with handler instances.
+
+        :param instances: Progress handler instances to coordinate.
+        :param enabled: Whether to enable the progress group.
+        """
+        self.instances: list[BenchmarkerProgress[BenchmarkT]] = list(instances)
+        super().__init__(enabled=enabled)
 
     @property
-    def total(self) -> Optional[float]:
-        if self.max_number is None and self.max_duration is None:
-            return None
+    def enabled(self) -> bool:
+        """Whether the progress group is currently enabled."""
+        return self._enabled
+
+    @enabled.setter
+    def enabled(self, value: bool):
+        """
+        Set enabled state for group and all contained instances.
+
+        :param value: New enabled state.
+        """
+        self._enabled = value
+        for instance in self.instances:
+            instance.enabled = value
 
-        return 1000
+    async def on_initialize(self, profile: Profile):
+        """
+        Initialize all progress handler instances.
+
+        :param profile: Benchmark profile configuration.
+        """
+        await asyncio.gather(
+            *[child.on_initialize(profile) for child in self.instances]
+        )
+
+    async def on_benchmark_start(self, strategy: SchedulingStrategy):
+        """
+        Notify all handlers of benchmark strategy start.
+
+        :param strategy: Scheduling strategy being executed.
+        """
+        await asyncio.gather(
+            *[child.on_benchmark_start(strategy) for child in self.instances]
+        )
+
+    async def on_benchmark_update(
+        self, aggregator_update: AggregatorState, scheduler_state: SchedulerState
+    ):
+        """
+        Distribute benchmark updates to all handlers.
+
+        :param aggregator_update: Current benchmark metrics and statistics.
+        :param scheduler_state: Current scheduler execution state.
+        """
+        await asyncio.gather(
+            *[
+                child.on_benchmark_update(aggregator_update, scheduler_state)
+                for child in self.instances
+            ]
+        )
+
+    async def on_benchmark_complete(self, benchmark: BenchmarkT):
+        """
+        Notify all handlers of benchmark completion.
+
+        :param benchmark: Completed benchmark results.
+        """
+        await asyncio.gather(
+            *[child.on_benchmark_complete(benchmark) for child in self.instances]
+        )
+
+    async def on_finalize(self):
+        """Finalize all progress handler instances."""
+        await asyncio.gather(*[child.on_finalize() for child in self.instances])
+
+    async def on_raw_update(
+        self,
+        profile: Profile,
+        aggregator_update: AggregatorState | None,
+        benchmark: BenchmarkT | None,
+        strategy: SchedulingStrategy,
+        scheduler_state: SchedulerState | None,
+    ):
+        """
+        Distribute raw updates to all handlers.
+
+        :param profile: Benchmark profile configuration.
+        :param aggregator_update: Current benchmark metrics and statistics.
+        :param benchmark: Completed benchmark if available.
+        :param strategy: Current scheduling strategy.
+        :param scheduler_state: Current scheduler execution state.
+        """
+        await asyncio.gather(
+            *[
+                child.on_raw_update(
+                    profile,
+                    aggregator_update,
+                    benchmark,
+                    strategy,
+                    scheduler_state,
+                )
+                for child in self.instances
+            ]
+        )
+
+
+class GenerativeConsoleBenchmarkerProgress(
+    BenchmarkerProgress[GenerativeBenchmark], Live
+):
+    """
+    Console-based progress display for generative benchmarks.
+
+    Provides real-time visual progress tracking using Rich library components,
+    displaying benchmark execution statistics, timing information, and progress
+    bars in a structured console interface.
+    """
+
+    def __init__(self, enabled: bool = True, display_scheduler_stats: bool = False):
+        """
+        Initialize console progress display.
+
+        :param enabled: Whether to enable progress tracking and display.
+        :param display_scheduler_stats: Whether to display scheduler statistics.
+        """
+        BenchmarkerProgress.__init__(self, enabled=enabled)
+        Live.__init__(
+            self,
+            refresh_per_second=4,
+            auto_refresh=True,
+            redirect_stdout=True,
+            redirect_stderr=True,
+        )
+        self.display_scheduler_stats: bool = display_scheduler_stats
+        self.run_progress: Progress = None
+        self.run_progress_task: TaskID = None
+        self.tasks_progress: _GenerativeProgressTasks = None
+
+    async def on_initialize(self, profile: Profile):
+        """
+        Initialize console display components and start rendering.
+
+        :param profile: Benchmark profile configuration.
+        """
+        self.tasks_progress = _GenerativeProgressTasks(
+            profile=profile, display_scheduler_stats=self.display_scheduler_stats
+        )
+        self.run_progress = Progress(
+            TextColumn("Generating...", style=f"italic {Colors.progress}"),
+            BarColumn(
+                bar_width=None,
+                complete_style=Colors.progress,
+                finished_style=Colors.success,
+            ),
+            TextColumn(
+                "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
+                style=Colors.progress,
+            ),
+            TextColumn("["),
+            TimeElapsedColumn(),
+            TextColumn("<"),
+            TimeRemainingColumn(),
+            TextColumn("]"),
+        )
+        self.run_progress_task = self.run_progress.add_task("")
+        self._sync_run_progress()
+        self.update(
+            Group(
+                Panel(
+                    self.tasks_progress,
+                    title="Benchmarks",
+                    title_align="left",
+                    expand=True,
+                ),
+                self.run_progress,
+            )
+        )
+        self.start()
+
+    async def on_benchmark_start(self, strategy: SchedulingStrategy):
+        """
+        Update display for new benchmark strategy start.
+
+        :param strategy: Scheduling strategy being executed.
+        """
+        self.tasks_progress.start_benchmark(strategy)
+        self._sync_run_progress()
+
+    async def on_benchmark_update(
+        self, aggregator_update: AggregatorState | None, scheduler_state: SchedulerState
+    ):
+        """
+        Update display with current benchmark progress.
+
+        :param aggregator_update: Current benchmark metrics and statistics.
+        :param scheduler_state: Current scheduler execution state.
+        """
+        self.tasks_progress.update_benchmark(aggregator_update, scheduler_state)
+        self._sync_run_progress()
+
+    async def on_benchmark_complete(self, benchmark: GenerativeBenchmark):
+        """
+        Update display for completed benchmark.
+
+        :param benchmark: Completed benchmark results.
+        """
+        self.tasks_progress.complete_benchmark(benchmark)
+        self._sync_run_progress()
+
+    async def on_finalize(self):
+        """Stop display rendering and cleanup resources."""
+        self.tasks_progress.finalize()
+        self._sync_run_progress()
+        self.run_progress.stop_task(self.run_progress_task)
+        self.stop()
+        self.run_progress = None
+        self.run_progress_task = None
+        self.tasks_progress = None
+
+    def _sync_run_progress(self):
+        """Synchronize overall progress display with task progress."""
+        self.run_progress.update(
+            self.run_progress_task,
+            total=self.tasks_progress.steps_total,
+            completed=self.tasks_progress.steps_progress,
+            completed_benchmarks=self.tasks_progress.tasks_progress,
+            total_benchmarks=self.tasks_progress.tasks_total,
+        )
+
+
+# Scaling factor for progress calculations to provide granular progress updates
+_PROGRESS_SCALE = 1000
+
+
+class _GenerativeProgressTasks(Progress):
+    def __init__(self, profile: Profile, display_scheduler_stats: bool):
+        self.profile: Profile = profile
+        self.display_scheduler_stats: bool = display_scheduler_stats
+        self.benchmark_task_states: list[_GenerativeProgressTaskState] = []
+        self.current_index: int = -1
+
+        summary_text = "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}"
+        if self.display_scheduler_stats:
+            summary_text += "\n{task.fields[scheduler_stats]}"
+        super().__init__(
+            TextColumn("[{task.fields[start_time]}]"),
+            SpinnerColumn(style=Colors.progress),
+            TaskProgressColumn(style=Colors.progress),
+            TextColumn("{task.description}"),
+            TextColumn("({task.fields[progress_status]})"),
+            TextColumn(" "),
+            TextColumn(summary_text),
+        )
+
+        for strategy_type in profile.strategy_types:
+            task_state = _GenerativeProgressTaskState(
+                strategy_type=strategy_type,
+            )
+            task_id = self.add_task(**task_state.current)
+            task_state.task_id = task_id
+            self.benchmark_task_states.append(task_state)
 
     @property
-    def completed(self) -> int:
-        if self.ended:
-            return 1000
+    def tasks_total(self) -> int:
+        return len(self.benchmark_task_states)
 
-        if self.max_number is None and self.max_duration is None:
-            return 0
+    @property
+    def tasks_progress(self) -> int:
+        return self.current_index + 1
 
-        number = self.requests_successful + self.requests_errored
-        number_percent = (
-            number / float(self.max_number) * 1000 if self.max_number else -math.inf
+    @property
+    def steps_total(self) -> int:
+        return _PROGRESS_SCALE * len(self.benchmark_task_states)
+
+    @property
+    def steps_progress(self) -> int:
+        progress_current_task = (
+            self.benchmark_task_states[self.current_index].progress
+            if self.current_index < len(self.benchmark_task_states)
+            else 0
+        )
+        progress_total = self.current_index + (progress_current_task or 0)
+
+        return progress_total * _PROGRESS_SCALE
+
+    def start_benchmark(self, strategy: SchedulingStrategy):
+        self.current_index += 1
+        if self.current_index >= len(self.benchmark_task_states):
+            # New task past initially estimated, append it to the end
+            task_state = _GenerativeProgressTaskState(strategy_type=strategy.type_)
+            task_id = self.add_task(**task_state.current)
+            task_state.task_id = task_id
+            self.benchmark_task_states.append(task_state)
+
+        self.benchmark_task_states[self.current_index].start(strategy)
+        self.update(
+            self.benchmark_task_states[self.current_index].task_id,
+            start=True,
+            **self.benchmark_task_states[self.current_index].current,
+        )
+
+    def update_benchmark(
+        self, aggregator_update: AggregatorState, scheduler_state: SchedulerState
+    ):
+        self.benchmark_task_states[self.current_index].update(
+            aggregator_update, scheduler_state
+        )
+        self.update(
+            self.benchmark_task_states[self.current_index].task_id,
+            **self.benchmark_task_states[self.current_index].current,
         )
-        duration_percent = (
-            (time.time() - self.start_time) / self.max_duration * 1000
-            if self.max_duration and self.start_time
-            else -math.inf
+
+    def complete_benchmark(self, benchmark: GenerativeBenchmark):
+        self.benchmark_task_states[self.current_index].complete(benchmark)
+        self.update(
+            self.benchmark_task_states[self.current_index].task_id,
+            **self.benchmark_task_states[self.current_index].current,
         )
 
-        return min(int(max(number_percent, duration_percent)), 1000)
+    def finalize(self):
+        self.stop()
+
+
+@dataclass
+class _GenerativeProgressTaskState:
+    strategy_type: StrategyType
+    task_id: TaskID = None
+    strategy: SchedulingStrategy | None = None
+    benchmark_status: Literal[
+        "pending", "in_warmup", "in_progress", "in_cooldown", "completed"
+    ] = "pending"
+    progress: float | None = None
+    start_time: float = -1.0
+    successful_requests: int = 0
+    cancelled_requests: int = 0
+    errored_requests: int = 0
+    request_concurrency: int = 0
+    requests_per_second: float = 0
+    request_latency: float = 0
+    output_tokens: int = 0
+    output_tokens_rate: float = 0
+    prompt_tokens: int = 0
+    total_tokens_rate: float = 0
+    time_to_first_token: float = 0
+    inter_token_latency: float = 0
+    queued_time: float = 0
+    request_targeted_start_delay: float = 0
+    scheduler_overheads_time: float = 0
 
     @property
-    def fields(self) -> dict[str, str]:
-        fields = {
+    def current(self) -> dict[str, Any]:
+        return {
             "start_time": self.formatted_start_time,
+            "description": str(self.strategy or self.strategy_type),
             "progress_status": self.formatted_progress_status,
             "requests_summary": self.formatted_requests_summary,
+            "tokens_summary": self.formatted_tokens_summary,
+            "scheduler_stats": self.formatted_scheduler_stats,
+            "completed": self.completed,
+            "total": self.total,
         }
 
-        if self.display_scheduler_stats:
-            fields["scheduler_stats"] = self.formatted_scheduler_stats
+    @property
+    def completed(self) -> float:
+        if self.benchmark_status == "pending":
+            return 0
+
+        if self.benchmark_status == "completed":
+            return _PROGRESS_SCALE
 
-        return fields
+        return self.progress * _PROGRESS_SCALE if self.progress is not None else None
+
+    @property
+    def total(self) -> float:
+        return _PROGRESS_SCALE
 
     @property
     def formatted_start_time(self) -> str:
-        if self.start_time is None:
+        if self.start_time < 0.0:
             return "--:--:--"
 
         return datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
 
     @property
     def formatted_progress_status(self) -> str:
-        if self.ended:
-            status = "complete"
-            color = Colors.SUCCESS
-        elif self.compiling:
-            status = "compiling"
-            color = Colors.PROGRESS
-        elif self.started and self.in_warmup:
+        if self.benchmark_status == "in_warmup":
             status = "warmup"
-            color = Colors.PROGRESS
-        elif self.started and self.in_cooldown:
-            status = "cooldown"
-            color = Colors.PROGRESS
-        elif self.started:
+            color = Colors.progress
+        elif self.benchmark_status == "in_progress":
             status = "running"
-            color = Colors.PROGRESS
+            color = Colors.progress
+        elif self.benchmark_status == "in_cooldown":
+            status = "cooldown"
+            color = Colors.progress
+        elif self.benchmark_status == "completed":
+            status = "complete"
+            color = Colors.success
         else:
             status = "pending"
-            color = Colors.INFO
+            color = Colors.info
 
         return f"[{color}]{status.ljust(8)}[/{color}]"
 
     @property
     def formatted_requests_summary(self) -> str:
-        if not self.started:
+        if self.benchmark_status == "pending":
             return " "
 
         return (
-            f"[{Colors.INFO}]Req:[/{Colors.INFO}] "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_rate,
+            f"[{Colors.info}]Req:[/{Colors.info}] "
+            + format_value_display(
+                value=self.requests_per_second,
                 label="req/s",
                 total_characters=12,
                 digits_places=4,
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
+            + format_value_display(
                 value=self.request_latency,
                 label="Lat",
                 units="s",
@@ -166,32 +670,32 @@ def formatted_requests_summary(self) -> str:
                 decimal_places=2,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_processing,
+            + format_value_display(
+                value=self.request_concurrency,
                 label="Conc",
                 total_characters=12,
                 digits_places=4,
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_successful,
+            + format_value_display(
+                value=self.successful_requests,
                 label="Comp",
                 total_characters=12,
                 digits_places=5,
                 decimal_places=0,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_incomplete,
+            + format_value_display(
+                value=self.cancelled_requests,
                 label="Inc",
                 total_characters=12,
                 digits_places=5,
                 decimal_places=0,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_errored,
+            + format_value_display(
+                value=self.errored_requests,
                 label="Err",
                 total_characters=12,
                 digits_places=5,
@@ -199,101 +703,14 @@ def formatted_requests_summary(self) -> str:
             )
         )
 
-    @property
-    def formatted_scheduler_stats(self) -> str:
-        if not self.started:
-            return " "
-
-        return (
-            f"[{Colors.INFO}]Sys:[/{Colors.INFO}] "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.worker_overheads_time_ms,
-                label="Work OH",
-                units="ms",
-                total_characters=18,
-                digits_places=3,
-                decimal_places=1,
-            )
-            + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.backend_overheads_time_ms,
-                label="Back OH",
-                units="ms",
-                total_characters=18,
-                digits_places=3,
-                decimal_places=1,
-            )
-            + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_sleep_time_ms,
-                label="Req Sleep",
-                units="ms",
-                total_characters=18,
-                digits_places=5,
-                decimal_places=0,
-            )
-            + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.requests_targeted_start_time_delay_ms,
-                label="Start Del",
-                units="ms",
-                total_characters=18,
-                digits_places=5,
-                decimal_places=0,
-            )
-        )
-
-    @staticmethod
-    def format_progress_display(
-        value: float,
-        label: str,
-        units: str = "",
-        total_characters: Optional[int] = None,
-        digits_places: Optional[int] = None,
-        decimal_places: Optional[int] = None,
-    ) -> str:
-        if decimal_places is None and digits_places is None:
-            formatted_number = f"{value:.0f}"
-        elif digits_places is None:
-            formatted_number = f"{value:.{decimal_places}f}"
-        elif decimal_places is None:
-            formatted_number = f"{value:>{digits_places}f}"
-        else:
-            formatted_number = f"{value:>{digits_places}.{decimal_places}f}"
-
-        result = f"{formatted_number}{units} [{Colors.INFO}]{label}[/{Colors.INFO}]"
-
-        if total_characters is not None:
-            total_characters += len(Colors.INFO) * 2 + 5
-
-            if len(result) < total_characters:
-                result = result.rjust(total_characters)
-
-        return result
-
-
-class GenerativeTextBenchmarkerTaskProgressState(BenchmarkerTaskProgressState):
-    output_tokens: float = 0
-    prompt_tokens: float = 0
-    output_tokens_rate: float = 0
-    total_tokens_rate: float = 0
-    tokens_ttft: float = 0
-    tokens_itl: float = 0
-
-    @property
-    def fields(self) -> dict[str, str]:
-        fields = super().fields
-        fields["tokens_summary"] = self.formatted_tokens_summary
-        return fields
-
     @property
     def formatted_tokens_summary(self) -> str:
-        if not self.started:
+        if self.benchmark_status == "pending":
             return " "
 
         return (
-            f"[{Colors.INFO}]Tok:[/{Colors.INFO}] "
-            + BenchmarkerTaskProgressState.format_progress_display(
+            f"[{Colors.info}]Tok:[/{Colors.info}] "
+            + format_value_display(
                 value=self.output_tokens_rate,
                 label="gen/s",
                 total_characters=12,
@@ -301,7 +718,7 @@ def formatted_tokens_summary(self) -> str:
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
+            + format_value_display(
                 value=self.total_tokens_rate,
                 label="tot/s",
                 total_characters=12,
@@ -309,8 +726,8 @@ def formatted_tokens_summary(self) -> str:
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.tokens_ttft,
+            + format_value_display(
+                value=self.time_to_first_token,
                 label="TTFT",
                 units="ms",
                 total_characters=12,
@@ -318,8 +735,8 @@ def formatted_tokens_summary(self) -> str:
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
-                value=self.tokens_itl,
+            + format_value_display(
+                value=self.inter_token_latency,
                 label="ITL",
                 units="ms",
                 total_characters=12,
@@ -327,7 +744,7 @@ def formatted_tokens_summary(self) -> str:
                 decimal_places=1,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
+            + format_value_display(
                 value=self.prompt_tokens,
                 label="Prompt",
                 total_characters=12,
@@ -335,7 +752,7 @@ def formatted_tokens_summary(self) -> str:
                 decimal_places=0,
             )
             + ", "
-            + BenchmarkerTaskProgressState.format_progress_display(
+            + format_value_display(
                 value=self.output_tokens,
                 label="Gen",
                 total_characters=12,
@@ -344,377 +761,216 @@ def formatted_tokens_summary(self) -> str:
             )
         )
 
+    @property
+    def formatted_scheduler_stats(self) -> str:
+        if self.benchmark_status == "pending":
+            return " "
 
-BTPS = TypeVar("BTPS", bound=BenchmarkerTaskProgressState)
-
-
-class BenchmarkerProgressDisplay(Generic[BTPS]):
-    def __init__(self, display_scheduler_stats: bool):
-        self.display_scheduler_stats = display_scheduler_stats
-        self.started = False
-        self.benchmarker_tasks_progress = Progress(*self.create_task_progress_columns())
-        self.benchmarker_tasks_panel = Panel(
-            self.benchmarker_tasks_progress,
-            title="Benchmarks",
-            title_align="left",
-            expand=True,
-        )
-        self.benchmarker_progress = Progress(
-            TextColumn("Generating...", style=f"italic {Colors.PROGRESS}"),
-            BarColumn(
-                bar_width=None,
-                complete_style=Colors.PROGRESS,
-                finished_style=Colors.SUCCESS,
-            ),
-            TextColumn(
-                "({task.fields[completed_benchmarks]}/{task.fields[total_benchmarks]})",
-                style=Colors.PROGRESS,
-            ),
-            TextColumn("["),
-            TimeElapsedColumn(),
-            TextColumn("<"),
-            TimeRemainingColumn(),
-            TextColumn("]"),
-        )
-        self.benchmarker_live = Live(
-            Group(
-                self.benchmarker_tasks_panel,
-                self.benchmarker_progress,
-            ),
-            redirect_stdout=True,
-            redirect_stderr=True,
-        )
-        self.active_task: Optional[TaskID] = None
-        self.benchmarker_tasks: list[BTPS] = []
-        self.progress_task: Optional[TaskID] = None
-
-    def update(self, result: BenchmarkerResult):
-        if result.type_ == "run_start":
-            if self.started:
-                raise RuntimeError("Progress display already started.")
-
-            self.handle_start(result)
-            self.started = True
-        elif result.type_ == "run_complete":
-            if not self.started:
-                raise RuntimeError("Progress display not started.")
-
-            self.handle_end(result)
-            self.started = False
-        else:
-            if not self.started:
-                raise RuntimeError("Progress display not started.")
-
-            self.handle_update(result)
-
-    def handle_start(self, result: BenchmarkerResult):
-        self.benchmarker_live.start()
-
-        for index, strategy_type in enumerate(result.profile.strategy_types):
-            task_id = self.benchmarker_tasks_progress.add_task(
-                description=strategy_type,
-                start=False,
-                total=None,
-                completed=0,
-                visible=False,
+        return (
+            f"[{Colors.info}]Sys:[/{Colors.info}] , "
+            + format_value_display(
+                value=self.request_targeted_start_delay,
+                label="Start Del",
+                units="ms",
+                total_characters=18,
+                digits_places=5,
+                decimal_places=0,
             )
-            task_progress_state = self.create_task_progress_state(
-                task_id=task_id,
-                index=index,
-                strategy_type=strategy_type,
-                result=result,
+            + format_value_display(
+                value=self.scheduler_overheads_time,
+                label="Sched OH",
+                units="ms",
+                total_characters=18,
+                digits_places=3,
+                decimal_places=1,
             )
-            self.benchmarker_tasks.append(task_progress_state)
-            self.benchmarker_tasks_progress.update(
-                task_id,
-                description=task_progress_state.description,
-                visible=True,
-                **task_progress_state.fields,  # type: ignore[arg-type]
+            + ", "
+            + format_value_display(
+                value=self.queued_time,
+                label="Queued",
+                units="ms",
+                total_characters=18,
+                digits_places=5,
+                decimal_places=0,
             )
-
-        self.progress_task = self.benchmarker_progress.add_task(
-            "",
-            total=len(self.benchmarker_tasks) * 1000,
-            completed_benchmarks=0,
-            total_benchmarks=len(self.benchmarker_tasks),
         )
 
-    def handle_update(self, result: BenchmarkerResult):
-        current_state: BTPS = self.benchmarker_tasks[result.current_index]
-
-        if result.type_ == "scheduler_start":
-            self.handle_update_scheduler_start(current_state, result)
-            self.active_task = current_state.task_id
-        elif result.type_ == "scheduler_update":
-            self.handle_update_scheduler_update(current_state, result)
-        elif result.type_ == "scheduler_complete":
-            self.handle_update_scheduler_complete(current_state, result)
-        elif result.type_ == "benchmark_compiled":
-            self.handle_update_benchmark_compiled(current_state, result)
-        else:
-            raise ValueError(f"Unknown result type: {result.type_}")
+    def start(self, strategy: SchedulingStrategy):
+        self.strategy = strategy
+        self.strategy_type = strategy.type_
 
-        if self.progress_task is None:
-            raise RuntimeError("Progress task not set.")
-
-        self.benchmarker_tasks_progress.update(
-            current_state.task_id,
-            description=current_state.description,
-            completed=current_state.completed,
-            total=current_state.total,
-            **current_state.fields,  # type: ignore[arg-type]
-        )
-        self.benchmarker_progress.update(
-            self.progress_task,
-            completed=(result.current_index * 1000) + current_state.completed,
-            total=1000 * len(self.benchmarker_tasks),
-            completed_benchmarks=(
-                result.current_index + (1 if current_state.ended else 0)
+    def update(
+        self, aggregator_update: AggregatorState, scheduler_state: SchedulerState
+    ):
+        self.progress = (
+            (1.0 - scheduler_state.remaining_fraction)
+            if scheduler_state.remaining_fraction is not None
+            else 0.0
+        )
+        status: Literal["in_warmup", "in_progress", "in_cooldown"] | None = (
+            "in_progress"  # Need to handle requests_in_* isn't in aggregator_update
+        )
+        if aggregator_update.get("requests_in_warmup"):
+            status = "in_warmup"
+        elif aggregator_update.get("requests_in_cooldown"):
+            status = "in_cooldown"
+        self._update_processing_states(
+            benchmark_status=status,
+            start_time=scheduler_state.start_time,
+            successful_requests=scheduler_state.successful_requests,
+            cancelled_requests=scheduler_state.cancelled_requests,
+            errored_requests=scheduler_state.errored_requests,
+        )
+        self._update_request_stats(
+            request_concurrency=aggregator_update.get_metric(
+                key="requests", type_="avg", prefix="completed"
+            ),
+            requests_per_second=aggregator_update.get_metric(
+                key="requests",
+                type_="rate",
+                prefix="completed",
+            ),
+            request_latency=aggregator_update.get_metric(
+                key="request_latency", type_="avg", prefix="completed"
             ),
-            total_benchmarks=len(self.benchmarker_tasks),
         )
-
-        if current_state.ended:
-            self.benchmarker_tasks_progress.stop_task(current_state.task_id)
-            self.active_task = None
-
-    def handle_update_scheduler_start(
-        self, progress_state: BTPS, result: BenchmarkerResult
-    ):
-        if self.active_task is not None:
-            raise RuntimeError("Active task already set.")
-
-        progress_state.strategy = result.current_strategy  # type: ignore[assignment]
-        progress_state.started = True
-        current_aggregator: BenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
-        progress_state.start_time = (
-            current_aggregator.requests_stats.totals.total.start_time
+        self._update_token_stats(
+            output_tokens=aggregator_update.get_metric(
+                key="output_tokens", type_="avg", prefix="completed"
+            ),
+            output_tokens_rate=aggregator_update.get_metric(
+                key="output_tokens", type_="rate"
+            ),
+            prompt_tokens=aggregator_update.get_metric(
+                key="prompt_tokens", type_="avg", prefix="completed"
+            ),
+            total_tokens_rate=aggregator_update.get_metric(
+                key="total_tokens", type_="rate"
+            ),
+            time_to_first_token=(
+                aggregator_update.get_metric(key="time_to_first_token", type_="avg")
+            ),
+            inter_token_latency=(
+                aggregator_update.get_metric(key="inter_token_latency", type_="avg")
+            ),
         )
-        progress_state.max_number = current_aggregator.args.max_number
-        progress_state.max_duration = current_aggregator.args.max_duration
-
-    def handle_update_scheduler_update(
-        self, progress_state: BTPS, result: BenchmarkerResult
-    ):
-        if self.active_task is None:
-            raise RuntimeError("Active task not set.")
-
-        if self.active_task != progress_state.task_id:
-            raise RuntimeError("Active task does not match current task.")
+        if aggregator_update.get("updated_scheduler_stats"):
+            self._update_system_stats(
+                request_targeted_start_delay=(
+                    aggregator_update.get_metric(
+                        key="request_targeted_start_delay", type_="avg", default=0.0
+                    )
+                ),
+                queued_time=(
+                    aggregator_update.get_metric(
+                        key="queued_time", type_="avg", default=0.0
+                    )
+                ),
+                scheduler_overheads_time=0.0,  # Need to add up metrics here
+            )
 
-        current_aggregator: BenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
-        progress_state.in_warmup = current_aggregator.in_warmup
-        progress_state.in_cooldown = current_aggregator.in_cooldown
-        progress_state.requests_rate = (
-            current_aggregator.requests_stats.totals.successful.rate
-        )
-        progress_state.request_latency = (
-            current_aggregator.requests_stats.request_time.mean
-        )
-        progress_state.requests_processing = (
-            current_aggregator.scheduler_stats.processing_requests.last
-        )
-        progress_state.requests_successful = (
-            current_aggregator.requests_stats.totals.successful.total
-        )
-        progress_state.requests_incomplete = (
-            current_aggregator.requests_stats.totals.incomplete.total
-        )
-        progress_state.requests_errored = (
-            current_aggregator.requests_stats.totals.errored.total
-        )
-        progress_state.worker_overheads_time_ms = (
-            current_aggregator.requests_stats.scheduled_time_delay.mean_ms
-            + current_aggregator.requests_stats.worker_start_delay.mean_ms
-        )
-        progress_state.backend_overheads_time_ms = (
-            current_aggregator.requests_stats.request_time_delay.mean_ms
-        )
-        progress_state.requests_sleep_time_ms = (
-            current_aggregator.requests_stats.scheduled_time_sleep.mean_ms
-        )
-        progress_state.requests_targeted_start_time_delay_ms = (
-            current_aggregator.requests_stats.request_start_time_targeted_delay.mean_ms
+    def complete(self, benchmark: GenerativeBenchmark):
+        self._update_processing_states(
+            benchmark_status="completed",
+            start_time=benchmark.start_time,
+            successful_requests=benchmark.request_totals.successful,
+            cancelled_requests=benchmark.request_totals.incomplete,
+            errored_requests=benchmark.request_totals.errored,
+        )
+        self._update_request_stats(
+            request_concurrency=benchmark.metrics.request_concurrency.successful.mean,
+            requests_per_second=benchmark.metrics.requests_per_second.successful.mean,
+            request_latency=benchmark.metrics.request_latency.successful.mean,
+        )
+        self._update_token_stats(
+            output_tokens=benchmark.metrics.output_token_count.successful.mean,
+            output_tokens_rate=benchmark.metrics.output_tokens_per_second.successful.mean,
+            prompt_tokens=benchmark.metrics.prompt_token_count.successful.mean,
+            total_tokens_rate=benchmark.metrics.tokens_per_second.successful.mean,
+            time_to_first_token=(
+                benchmark.metrics.time_to_first_token_ms.successful.mean
+            ),
+            inter_token_latency=(
+                benchmark.metrics.inter_token_latency_ms.successful.mean
+            ),
+            converted=True,
         )
 
-    def handle_update_scheduler_complete(
+    def _update_processing_states(
         self,
-        progress_state: BTPS,
-        result: BenchmarkerResult,  # noqa: ARG002
+        benchmark_status: Literal[
+            "pending", "in_warmup", "in_progress", "in_cooldown", "completed"
+        ],
+        start_time: float | None = None,
+        successful_requests: int | None = None,
+        cancelled_requests: int | None = None,
+        errored_requests: int | None = None,
     ):
-        if self.active_task is None:
-            raise RuntimeError("Active task not set.")
-
-        if self.active_task != progress_state.task_id:
-            raise RuntimeError("Active task does not match current task.")
-
-        progress_state.in_warmup = False
-        progress_state.in_cooldown = False
-        progress_state.compiling = True
-
-    def handle_update_benchmark_compiled(
-        self, progress_state: BTPS, result: BenchmarkerResult
-    ):
-        if self.active_task is None:
-            raise RuntimeError("Active task not set.")
-
-        if self.active_task != progress_state.task_id:
-            raise RuntimeError("Active task does not match current task.")
-
-        current_benchmark: Benchmark = result.current_benchmark  # type: ignore[assignment]
-        progress_state.compiling = False
-        progress_state.ended = True
-        progress_state.requests_rate = (
-            current_benchmark.metrics.requests_per_second.successful.mean
-        )
-        progress_state.requests_processing = (
-            current_benchmark.metrics.request_concurrency.successful.mean
-        )
-
-    def handle_end(self, result: BenchmarkerResult):  # noqa: ARG002
-        if self.progress_task is None:
-            raise RuntimeError("Progress task not set.")
-
-        self.benchmarker_progress.update(
-            self.progress_task,
-            completed=len(self.benchmarker_tasks) * 1000,
-            total=len(self.benchmarker_tasks) * 1000,
-            completed_benchmarks=len(self.benchmarker_tasks),
-            total_benchmarks=len(self.benchmarker_tasks),
-        )
-        self.benchmarker_progress.stop_task(self.progress_task)
-        self.benchmarker_live.stop()
-        self.active_task = None
-        self.benchmarker_tasks = []
-        self.progress_task = None
-
-    def create_task_progress_columns(self) -> list[ProgressColumn]:
-        columns = [
-            TextColumn("[{task.fields[start_time]}]"),
-            SpinnerColumn(style=Colors.PROGRESS),
-            TaskProgressColumn(style=Colors.PROGRESS),
-            TextColumn("{task.description}"),
-            TextColumn("({task.fields[progress_status]})"),
-            TextColumn(" "),
-        ]
-
-        if not self.display_scheduler_stats:
-            columns += [
-                TextColumn("{task.fields[requests_summary]}\n"),
-            ]
-        else:
-            columns += [
-                TextColumn(
-                    "{task.fields[requests_summary]}\n{task.fields[scheduler_stats]}\n"
-                ),
-            ]
-
-        return columns
-
-    def create_task_progress_state(
+        if benchmark_status is not None:
+            self.benchmark_status = benchmark_status
+        if start_time is not None:
+            self.start_time = start_time
+        if successful_requests is not None:
+            self.successful_requests = successful_requests
+        if cancelled_requests is not None:
+            self.cancelled_requests = cancelled_requests
+        if errored_requests is not None:
+            self.errored_requests = errored_requests
+
+    def _update_request_stats(
         self,
-        task_id: TaskID,
-        index: int,  # noqa: ARG002
-        strategy_type: StrategyType,
-        result: BenchmarkerResult,  # noqa: ARG002
-    ) -> BTPS:
-        return BenchmarkerTaskProgressState(  # type: ignore[return-value]
-            display_scheduler_stats=self.display_scheduler_stats,
-            task_id=task_id,
-            strategy=strategy_type,
-        )
-
-
-class GenerativeTextBenchmarkerProgressDisplay(
-    BenchmarkerProgressDisplay[GenerativeTextBenchmarkerTaskProgressState]
-):
-    def handle_update_scheduler_update(
-        self,
-        progress_state: GenerativeTextBenchmarkerTaskProgressState,
-        result: BenchmarkerResult,
+        request_concurrency: int | None = None,
+        requests_per_second: float | None = None,
+        request_latency: float | None = None,
     ):
-        super().handle_update_scheduler_update(progress_state, result)
-        current_aggregator: GenerativeBenchmarkAggregator = result.current_aggregator  # type: ignore[assignment]
-        progress_state.output_tokens = (
-            current_aggregator.requests_stats.output_tokens.mean
-        )
-        progress_state.prompt_tokens = (
-            current_aggregator.requests_stats.prompt_tokens.mean
-        )
-        progress_state.output_tokens_rate = (
-            current_aggregator.requests_stats.output_tokens.rate
-        )
-        progress_state.total_tokens_rate = (
-            current_aggregator.requests_stats.total_tokens.rate
-        )
-        progress_state.tokens_ttft = (
-            current_aggregator.requests_stats.time_to_first_token.mean_ms
-        )
-        progress_state.tokens_itl = (
-            current_aggregator.requests_stats.inter_token_latency.mean_ms
-        )
-
-    def handle_update_benchmark_compiled(
+        if request_concurrency is not None:
+            self.request_concurrency = request_concurrency
+        if requests_per_second is not None:
+            self.requests_per_second = requests_per_second
+        if request_latency is not None:
+            self.request_latency = request_latency
+
+    def _update_token_stats(
         self,
-        progress_state: GenerativeTextBenchmarkerTaskProgressState,
-        result: BenchmarkerResult,
+        output_tokens: int | None = None,
+        output_tokens_rate: float | None = None,
+        prompt_tokens: int | None = None,
+        total_tokens_rate: float | None = None,
+        time_to_first_token: float | None = None,
+        inter_token_latency: float | None = None,
+        converted: bool = False,
     ):
-        super().handle_update_benchmark_compiled(progress_state, result)
-
-        current_benchmark: GenerativeBenchmark = result.current_benchmark  # type: ignore[assignment]
-        progress_state.request_latency = (
-            current_benchmark.metrics.request_latency.successful.mean
-        )
-        progress_state.requests_successful = current_benchmark.request_totals.successful
-        progress_state.requests_errored = current_benchmark.request_totals.errored
-        progress_state.requests_incomplete = current_benchmark.request_totals.incomplete
-        progress_state.output_tokens = (
-            current_benchmark.metrics.output_token_count.successful.mean
-        )
-        progress_state.prompt_tokens = (
-            current_benchmark.metrics.prompt_token_count.successful.mean
-        )
-        progress_state.output_tokens_rate = (
-            current_benchmark.metrics.output_tokens_per_second.successful.mean
-        )
-        progress_state.total_tokens_rate = (
-            current_benchmark.metrics.tokens_per_second.successful.mean
-        )
-        progress_state.tokens_ttft = (
-            current_benchmark.metrics.time_to_first_token_ms.successful.mean
-        )
-        progress_state.tokens_itl = (
-            current_benchmark.metrics.inter_token_latency_ms.successful.mean
-        )
+        if output_tokens is not None:
+            self.output_tokens = output_tokens
+        if output_tokens_rate is not None:
+            self.output_tokens_rate = output_tokens_rate
+        if prompt_tokens is not None:
+            self.prompt_tokens = prompt_tokens
+        if total_tokens_rate is not None:
+            self.total_tokens_rate = total_tokens_rate
+        if time_to_first_token is not None:
+            self.time_to_first_token = time_to_first_token * (
+                1000 if not converted else 1
+            )
+        if inter_token_latency is not None:
+            self.inter_token_latency = inter_token_latency * (
+                1000 if not converted else 1
+            )
 
-    def create_task_progress_state(
+    def _update_system_stats(
         self,
-        task_id: TaskID,
-        index: int,  # noqa: ARG002
-        strategy_type: StrategyType,
-        result: BenchmarkerResult,  # noqa: ARG002
-    ) -> GenerativeTextBenchmarkerTaskProgressState:
-        return GenerativeTextBenchmarkerTaskProgressState(
-            display_scheduler_stats=self.display_scheduler_stats,
-            task_id=task_id,
-            strategy=strategy_type,
-        )
-
-    def create_task_progress_columns(self) -> list[ProgressColumn]:
-        columns = super().create_task_progress_columns()
-        columns = columns[:-1]  # remove the last display info column
-
-        if not self.display_scheduler_stats:
-            columns += [
-                TextColumn(
-                    "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}",
-                ),
-            ]
-        else:
-            columns += [
-                TextColumn(
-                    "{task.fields[requests_summary]}\n{task.fields[tokens_summary]}\n{task.fields[scheduler_stats]}",
-                ),
-            ]
-
-        return columns
+        request_targeted_start_delay: float | None = None,
+        queued_time: float | None = None,
+        scheduler_overheads_time: float | None = None,
+        converted: bool = False,
+    ):
+        if request_targeted_start_delay is not None:
+            self.request_targeted_start_delay = request_targeted_start_delay * (
+                1000 if not converted else 1
+            )
+        if queued_time is not None:
+            self.queued_time = queued_time * (1000 if not converted else 1)
+        if scheduler_overheads_time is not None:
+            self.scheduler_overheads_time = scheduler_overheads_time * (
+                1000 if not converted else 1
+            )
diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py
index 042b25b1..15e3cd81 100644
--- a/src/guidellm/benchmark/scenario.py
+++ b/src/guidellm/benchmark/scenario.py
@@ -1,7 +1,9 @@
+from __future__ import annotations
+
 from collections.abc import Iterable
 from functools import cache
 from pathlib import Path
-from typing import Annotated, Any, Literal, Optional, TypeVar, Union
+from typing import Annotated, Any, Literal, TypeVar
 
 from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
@@ -11,8 +13,8 @@
 
 from guidellm.backend.backend import BackendType
 from guidellm.benchmark.profile import ProfileType
-from guidellm.objects.pydantic import StandardBaseModel
-from guidellm.scheduler.strategies import StrategyType
+from guidellm.scheduler.strategy import StrategyType
+from guidellm.utils import StandardBaseModel
 
 __ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
 
@@ -25,7 +27,7 @@ def get_builtin_scenarios() -> list[str]:
     return [p.stem for p in SCENARIO_DIR.glob("*.json")]
 
 
-def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
+def parse_float_list(value: str | float | list[float]) -> list[float]:
     """
     Parse a comma separated string to a list of float
     or convert single float list of one or pass float
@@ -57,7 +59,7 @@ class Scenario(StandardBaseModel):
     target: str
 
     @classmethod
-    def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
+    def from_builtin(cls: type[T], name: str, overrides: dict | None = None) -> T:
         filename = SCENARIO_DIR / f"{name}.json"
 
         if not filename.is_file():
@@ -77,28 +79,28 @@ class Config:
         arbitrary_types_allowed = True
 
     backend_type: BackendType = "openai_http"
-    backend_args: Optional[dict[str, Any]] = None
-    model: Optional[str] = None
-    processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
-    processor_args: Optional[dict[str, Any]] = None
-    data: Union[
-        str,
-        Path,
-        Iterable[Union[str, dict[str, Any]]],
-        Dataset,
-        DatasetDict,
-        IterableDataset,
-        IterableDatasetDict,
-    ]
-    data_args: Optional[dict[str, Any]] = None
-    data_sampler: Optional[Literal["random"]] = None
-    rate_type: Union[StrategyType, ProfileType]
-    rate: Annotated[
-        Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
-    ] = None
-    max_seconds: Optional[PositiveFloat] = None
-    max_requests: Optional[PositiveInt] = None
-    warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
-    cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
-    output_sampling: Optional[NonNegativeInt] = None
+    backend_args: dict[str, Any] | None = None
+    model: str | None = None
+    processor: str | Path | PreTrainedTokenizerBase | None = None
+    processor_args: dict[str, Any] | None = None
+    data: (
+        str
+        | Path
+        | Iterable[str | dict[str, Any]]
+        | Dataset
+        | DatasetDict
+        | IterableDataset
+        | IterableDatasetDict
+    )
+    data_args: dict[str, Any] | None = None
+    data_sampler: Literal["random"] | None = None
+    rate_type: StrategyType | ProfileType
+    rate: Annotated[list[PositiveFloat] | None, BeforeValidator(parse_float_list)] = (
+        None
+    )
+    max_seconds: PositiveFloat | None = None
+    max_requests: PositiveInt | None = None
+    warmup_percent: Annotated[float | None, Field(gt=0, le=1)] = None
+    cooldown_percent: Annotated[float | None, Field(gt=0, le=1)] = None
+    output_sampling: NonNegativeInt | None = None
     random_seed: int = 42