vllm-project
diff --git a/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 3 additions & 1 deletion b/‎src/guidellm/benchmark/benchmarker.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/guidellm/benchmark/schemas.py‎
Lines changed: 162 additions & 74 deletions b/‎src/guidellm/benchmark/schemas.py‎
Lines changed: 162 additions & 74 deletions
diff --git a/‎src/guidellm/data/loaders.py‎
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/data/loaders.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 12 additions & 5 deletions b/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎src/guidellm/scheduler/environments.py‎
Lines changed: 26 additions & 46 deletions b/‎src/guidellm/scheduler/environments.py‎
Lines changed: 26 additions & 46 deletions
diff --git a/‎src/guidellm/scheduler/scheduler.py‎
Lines changed: 24 additions & 25 deletions b/‎src/guidellm/scheduler/scheduler.py‎
Lines changed: 24 additions & 25 deletions
@@ -109,16 +109,18 @@ async def run(
                 )
                 estimated_state = EstimatedBenchmarkState()
                 scheduler_state = None
+                scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
 
                 async for (
                     response,
                     request,
                     request_info,
                     scheduler_state,
-                ) in Scheduler[RequestT, ResponseT]().run(
+                ) in scheduler.run(
                     requests=requests,
                     backend=backend,
                     strategy=strategy,
+                    startup_duration=warmup if warmup and warmup >= 1 else 0.0,
                     env=environment,
                     **constraints or {},
                 ):
 
@@ -65,7 +65,7 @@ def __iter__(self):
         worker_modulus = worker_info.num_workers if worker_info is not None else 1
         worker_index = worker_info.id if worker_info is not None else 0
 
-        if self.precache is not None:
+        if self.precache:
             for index, item in enumerate(self.precache):
                 if (index + worker_index) % worker_modulus == 0:
                     yield item
 
@@ -1,3 +1,15 @@
+"""
+Scheduler subsystem for orchestrating benchmark workloads and managing worker processes.
+
+This module provides the core scheduling infrastructure for guidellm, including
+strategies for controlling request timing patterns (synchronous, asynchronous,
+constant rate, Poisson), constraints for limiting benchmark execution (duration,
+error rates, request counts), and distributed execution through worker processes.
+The scheduler coordinates between backend interfaces, manages benchmark state
+transitions, and handles multi-turn request sequences with customizable timing
+strategies and resource constraints.
+"""
+
 from .constraints import (
     Constraint,
     ConstraintInitializer,
@@ -28,11 +40,6 @@
     AsyncConstantStrategy,
     AsyncPoissonStrategy,
     ConcurrentStrategy,
-    ConstantRateRequestTimings,
-    LastCompletionRequestTimings,
-    NoDelayRequestTimings,
-    PoissonRateRequestTimings,
-    ScheduledRequestTimings,
     SchedulingStrategy,
     StrategyT,
     StrategyType,
 
@@ -1,18 +1,19 @@
 """
 Environment abstractions for coordinating scheduler execution across distributed nodes.
 
-Provides environment abstractions that handle synchronization, timing coordination,
-error propagation, and lifecycle management for scheduler execution across single
-or multiple nodes. The Environment protocol defines the interface for distributed
+Provides abstractions that handle synchronization, timing coordination, error
+propagation, and lifecycle management for scheduler execution across single or
+multiple nodes. The Environment protocol defines the interface for distributed
 coordination while NonDistributedEnvironment provides a minimal implementation
-for single-node execution.
+for single-node execution. Environments manage the complete execution lifecycle
+from parameter distribution through result aggregation.
 
-Environment Execution Flow:
-1. sync_run_params() - Distribute workload and synchronize parameters across nodes
-2. sync_run_start() - Coordinate synchronized start time for all nodes
-3. update_run_iteration() - Update state after each request (called per iteration)
+Execution Flow:
+1. sync_run_params() - Distribute workload and synchronize parameters
+2. sync_run_start() - Coordinate synchronized start time
+3. update_run_iteration() - Update state after each request iteration
 4. sync_run_error() - Handle and propagate errors across nodes
-5. sync_run_end() - Aggregate results and cleanup at completion
+5. sync_run_end() - Aggregate results and finalize execution
 """
 
 from __future__ import annotations
@@ -39,12 +40,12 @@
 
 class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
     """
-    Abstract base for coordinating scheduler execution across distributed nodes.
+    Abstract interface for coordinating scheduler execution across distributed nodes.
 
-    Defines the interface for managing distributed scheduler execution including
+    Defines the protocol for managing distributed scheduler execution including
     parameter synchronization, timing coordination, state updates, error propagation,
-    and result aggregation. Implementations handle the complexity of distributed
-    coordination while providing a unified interface for scheduler orchestration.
+    and result aggregation. Implementations handle distributed coordination complexity
+    while providing a unified interface for scheduler orchestration.
     """
 
     @abstractmethod
@@ -61,10 +62,6 @@ async def sync_run_params(
         """
         Synchronize execution parameters across nodes and resolve local scope.
 
-        Coordinates parameter distribution and validation across active nodes.
-        In distributed environments, handles node assignment and workload partitioning.
-        In non-distributed environments, typically returns parameters unchanged.
-
         :param requests: Complete set of requests to process across all nodes
         :param strategy: Scheduling strategy to apply during execution
         :param constraints: Runtime constraints to enforce during execution
@@ -78,9 +75,6 @@ async def sync_run_start(self) -> float:
         """
         Coordinate synchronized start time across all nodes.
 
-        Ensures all nodes begin processing simultaneously for accurate benchmarking
-        and consistent timing measurements across distributed execution.
-
         :return: Unix timestamp when all nodes should begin processing
         :raises Exception: If startup synchronization fails across nodes
         """
@@ -97,11 +91,6 @@ async def update_run_iteration(
         """
         Update environment state with completed request iteration results.
 
-        Called after each request processing to update execution progress and
-        synchronize any required state across nodes in distributed environments.
-        Generally, distributed is expected to store the iteration updates until
-        all nodes have processed and sync_run_end is called to retrieve them.
-
         :param response: Response generated for the request, if successful
         :param request: The processed request
         :param request_info: Metadata about request processing including timings
@@ -115,9 +104,6 @@ async def sync_run_error(self, err: list[Exception] | Exception):
         """
         Handle and propagate errors across all active nodes.
 
-        Coordinates error handling when failures occur, ensuring all nodes are
-        notified for appropriate cleanup or shutdown procedures.
-
         :param err: The exception(s) that occurred during execution
         """
         ...
@@ -136,10 +122,6 @@ async def sync_run_end(
         """
         Finalize execution and aggregate results from all nodes.
 
-        Handles cleanup, result synchronization, and error propagation at execution
-        completion. Collects and yields results from worker nodes in distributed
-        environments.
-
         :return: Iterator of (response, request, request_info, state) tuples from
             remote nodes in distributed environments, empty for non-distributed
         :raises Exception: Any errors that occurred during execution
@@ -151,9 +133,9 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
     """
     Single-node scheduler execution environment with minimal coordination overhead.
 
-    Simplified environment for running schedulers on a single node without distributed
-    coordination requirements. Implements the Environment interface with no-op
-    synchronization for local testing, development, and single-machine benchmarking.
+    Implements the Environment interface with no-op synchronization for local testing,
+    development, and single-machine benchmarking. All synchronization methods return
+    immediately without distributed coordination logic.
 
     Example:
     ::
@@ -165,29 +147,27 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
             SynchronousStrategy,
         )
 
-
-        # Definitions
+        env = NonDistributedEnvironment()
         requests = [f"req_{ind}" for ind in range(5)]
         strategy = SynchronousStrategy()
         constraints = {"max_num": MaxNumberConstraint(max_num=5)}
         state = SchedulerState()
 
-        # Run environment
         local_req, local_strat, local_const = await env.sync_run_params(
             requests, strategy, constraints
         )
         start_time = await env.sync_run_start()
         for req in local_req:
             state.processed_requests += 1
-            await env.update_run_iteration(
-                f"resp_{req}", req, RequestInfo(), state
-            )
+            await env.update_run_iteration(f"resp_{req}", req, RequestInfo(), state)
         async for nonlocal_req in env.sync_run_end():
             state.processed_requests += 1
     """
 
     def __init__(self):
-        """Initialize with empty error storage for single-node execution."""
+        """
+        Initialize single-node environment with empty error storage.
+        """
         self.run_errors: list[Exception] = []
 
     async def sync_run_params(
@@ -206,15 +186,15 @@ async def sync_run_params(
         :param requests: Requests to process locally
         :param strategy: Scheduling strategy to apply during execution
         :param constraints: Runtime constraints to enforce during execution
-        :return: Tuple containing the original (requests, strategy, constraints)
+        :return: Original (requests, strategy, constraints) tuple unchanged
         """
         return requests, strategy, constraints
 
     async def sync_run_start(self) -> float:
         """
         Return current time plus configured delay for single-node startup.
 
-        :return: Unix timestamp for when the run should start
+        :return: Unix timestamp when execution should begin
         """
         return time.time() + settings.scheduler_start_delay_non_distributed
 
@@ -229,7 +209,7 @@ async def update_run_iteration(
         No-op for single-node execution with no distributed state synchronization.
 
         :param response: Response generated for the request, if successful
-        :param request: The request that was processed
+        :param request: The processed request
         :param request_info: Metadata about request processing including timings
         :param state: Current scheduler state with metrics and progress
         """
@@ -256,7 +236,7 @@ async def sync_run_end(
         """
         Finalize single-node execution and propagate any stored errors.
 
-        :return: Empty iterator since there are no remote nodes
+        :return: Empty iterator as there are no remote nodes
         :raises Exception: Any error stored during execution via sync_run_error
         """
         if self.run_errors:
 
@@ -1,11 +1,10 @@
 """
-Thread-safe singleton scheduler for distributed load generation workload coordination.
+Thread-safe singleton scheduler for distributed benchmarking workload coordination.
 
-Provides the core orchestration engine that coordinates request processing across
-worker processes and distributed environments. Manages timing synchronization,
-resource allocation, constraint enforcement, and result aggregation for
-load generation operations. Integrates with backends, environments, and strategies
-to enable scalable load testing across various scenarios including LLM inference.
+Orchestrates request processing across worker processes with distributed timing
+coordination, constraint enforcement, and result aggregation. Integrates with
+backends, environments, and strategies to enable scalable load testing across
+various scenarios including LLM inference benchmarking.
 """
 
 from __future__ import annotations
@@ -38,16 +37,14 @@ class Scheduler(
     Thread-safe singleton scheduler for distributed benchmarking workload coordination.
 
     Orchestrates request processing across worker processes with distributed timing
-    coordination, constraint enforcement, and result aggregation. Provides a unified
-    interface for executing benchmarking operations while abstracting the complexity
-    of multi-process coordination, environment synchronization, and resource management.
-    Implements singleton pattern to ensure consistent execution state across concurrent
-    benchmark operations.
+    coordination, constraint enforcement, and result aggregation. Abstracts the
+    complexity of multi-process coordination, environment synchronization, and
+    resource management while providing a unified interface for executing benchmarking
+    operations. Implements singleton pattern to ensure consistent execution state.
 
     Example:
     ::
         from guidellm.scheduler import Scheduler
-        from guidellm.backends import OpenAIBackend
         from guidellm.scheduler import NonDistributedEnvironment, SynchronousStrategy
 
         scheduler = Scheduler()
@@ -58,14 +55,15 @@ class Scheduler(
             env=NonDistributedEnvironment(),
             max_requests=1000
         ):
-            print(f"Processed: {request} with info: {info} and response: {response}")
+            print(f"Processed: {request}")
     """
 
     async def run(
         self,
         requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
         backend: BackendInterface[RequestT, ResponseT],
         strategy: SchedulingStrategy,
+        startup_duration: float,
         env: Environment[RequestT, ResponseT] | None,
         **constraints: Any | dict[str, Any] | Constraint,
     ) -> AsyncIterator[
@@ -80,22 +78,23 @@ async def run(
         Execute distributed request processing with coordinated timing and constraints.
 
         Orchestrates the complete benchmarking workflow across worker processes with
-        environment synchronization, constraint enforcement, and error handling.
-        Manages resource lifecycle from initialization through cleanup while yielding
-        real-time processing updates for monitoring and aggregation.
+        environment synchronization, constraint enforcement, and error handling. Manages
+        resource lifecycle from initialization through cleanup while yielding real-time
+        processing updates for monitoring and aggregation.
 
-        :param requests: Request collection to process. Supports single requests or
+        :param requests: Request collection to process, supporting single requests or
             multi-turn sequences with optional inter-request delays
         :param backend: Backend interface for request processing and response generation
         :param strategy: Scheduling strategy controlling request timing and distribution
+        :param startup_duration: Duration in seconds for requests to ramp up
         :param env: Environment interface for distributed coordination and
-            synchronization
+            synchronization. Defaults to NonDistributedEnvironment if None
         :param constraints: Runtime constraints for execution control (max_requests,
-            max_duration, max_error_rate, etc.). Values can be primitives, dictionaries,
-            or constraint instances
-        :yields: Requests udpates as (response, request, request_info, scheduler_state)
-        tuples. Each request will generate three ordered updates:
-            queued, in_progress, completed | errored | cancelled.
+            max_duration, max_error_rate, etc.) as primitives, dictionaries, or
+            constraint instances
+        :yields: Request updates as (response, request, request_info, scheduler_state)
+            tuples. Each request generates three ordered updates: queued, in_progress,
+            completed | errored | cancelled
         :raises Exception: Worker process errors, environment synchronization failures,
             or constraint evaluation errors are propagated after cleanup
         """
@@ -122,10 +121,10 @@ async def run(
                 # Setup the worker group, sync start with the environment
                 worker_group = WorkerProcessGroup[RequestT, ResponseT](
                     requests=local_requests,
-                    cycle_requests=local_requests,
                     backend=backend,
                     strategy=local_strategy,
-                    constraints=local_constraints,
+                    startup_duration=startup_duration,
+                    **local_constraints,
                 )
                 await worker_group.create_processes()
                 local_start_time = await env.sync_run_start()