vllm-project
diff --git a/‎src/guidellm/scheduler/environment.py‎
Lines changed: 17 additions & 10 deletions b/‎src/guidellm/scheduler/environment.py‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎src/guidellm/scheduler/objects.py‎
Lines changed: 5 additions & 2 deletions b/‎src/guidellm/scheduler/objects.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/guidellm/scheduler/scheduler.py‎
Lines changed: 4 additions & 7 deletions b/‎src/guidellm/scheduler/scheduler.py‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎src/guidellm/scheduler/worker.py‎
Lines changed: 12 additions & 8 deletions b/‎src/guidellm/scheduler/worker.py‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎src/guidellm/scheduler/worker_group.py‎
Lines changed: 6 additions & 5 deletions b/‎src/guidellm/scheduler/worker_group.py‎
Lines changed: 6 additions & 5 deletions
@@ -29,11 +29,12 @@
     SchedulerState,
 )
 from guidellm.scheduler.strategy import SchedulingStrategy
+from guidellm.utils import InfoMixin
 
 __all__ = ["Environment", "NonDistributedEnvironment"]
 
 
-class Environment(ABC, Generic[RequestT, ResponseT]):
+class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
     """
     Abstract base for scheduler execution environments.
 
@@ -86,6 +87,7 @@ async def update_run_iteration(
         response: ResponseT | None,
         request: RequestT,
         request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
+        state: SchedulerState,
     ):
         """
         Update environment state with completed request iteration.
@@ -101,7 +103,7 @@ async def update_run_iteration(
         ...
 
     @abstractmethod
-    async def sync_run_error(self, err: Exception):
+    async def sync_run_error(self, err: list[Exception] | Exception):
         """
         Handle and propagate errors across all nodes.
 
@@ -144,13 +146,11 @@ class NonDistributedEnvironment(Environment):
     distributed coordination. Implements the Environment interface with minimal
     synchronization overhead for local testing, development, and single-machine
     benchmarking.
-
-    :ivar run_err: Exception that occurred during execution, if any.
     """
 
     def __init__(self):
         """Initialize with no stored errors."""
-        self.run_err: Exception = None
+        self.run_errors: list[Exception] = []
 
     async def sync_run_params(
         self,
@@ -181,6 +181,7 @@ async def update_run_iteration(
         response: ResponseT | None,
         request: RequestT,
         request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
+        state: SchedulerState,
     ):
         """
         No-op for single-node execution.
@@ -196,7 +197,8 @@ async def sync_run_error(self, err: Exception):
 
         :param err: The exception that occurred during execution.
         """
-        self.run_err = err
+        err = [err] if not isinstance(err, list) else err
+        self.run_errors.extend(err)
 
     async def sync_run_end(
         self,
@@ -214,8 +216,13 @@ async def sync_run_end(
         :return: Empty iterator since there are no remote nodes.
         :raises Exception: Any error stored during execution via sync_run_error.
         """
-        if self.run_err:
-            raise self.run_err
-        # Return empty async iterator for non-distributed environment
+        if self.run_errors:
+            if len(self.run_errors) == 1:
+                raise self.run_errors[0]
+            else:
+                raise RuntimeError(
+                    f"Errors occurred during execution: {self.run_errors}"
+                )
+
         return
-        yield
+        yield  # needed to force generator compilation
@@ -22,7 +22,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator, Iterable
+from collections.abc import AsyncIterator
 from typing import (
     Any,
     Generic,
@@ -54,7 +54,10 @@
 RequestT = TypeVar("RequestT")
 MultiTurnRequestT = TypeAliasType(
     "MultiTurnRequestT",
-    Iterable[Union[RequestT, tuple[RequestT, float]]],
+    Union[
+        list[Union[RequestT, tuple[RequestT, float]]],
+        tuple[Union[RequestT, tuple[RequestT, float]]],
+    ],
     type_params=(RequestT,),
 )
 ResponseT = TypeVar("ResponseT")
 
@@ -109,10 +109,7 @@ async def run(
         """
         with self.thread_lock:
             worker_group: (
-                WorkerProcessGroup[
-                    BackendT, RequestT, MeasuredRequestTimingsT, ResponseT
-                ]
-                | None
+                WorkerProcessGroup[RequestT, MeasuredRequestTimingsT, ResponseT] | None
             ) = None
 
             # Any issues during the run will raise an error (local or remote),
@@ -131,7 +128,7 @@ async def run(
 
                 # Setup the worker group, sync start with the environment
                 worker_group = WorkerProcessGroup[
-                    BackendT, RequestT, MeasuredRequestTimingsT, ResponseT
+                    RequestT, MeasuredRequestTimingsT, ResponseT
                 ](
                     backend=backend,
                     requests=local_requests,
@@ -154,13 +151,13 @@ async def run(
                     )
                     yield response, request, request_info, state
             except Exception as err:  # noqa: BLE001
-                env.sync_run_error(err)
+                await env.sync_run_error(err)
             finally:
                 # Ensure all worker processes are cleaned up for error or completion
                 if worker_group is not None:
                     err = await worker_group.shutdown()
                     if err is not None:
-                        env.sync_run_error(err)
+                        await env.sync_run_error(err)
 
             # Ensure any errors are raised and all responses
             # are yielded for aggregation on the primary node
 
@@ -12,7 +12,7 @@
 
 import asyncio
 import time
-from collections.abc import Generator, Iterable
+from collections.abc import Generator
 from multiprocessing import Queue
 from multiprocessing.synchronize import Barrier as ProcessingBarrier
 from multiprocessing.synchronize import Event as ProcessingEvent
@@ -112,6 +112,7 @@ def __init__(
             ]
         ] = None
         self.requests_canceled: ThreadingEvent = None
+        self.pull_requests_stopped: ThreadingEvent = None
         self.pull_task: asyncio.Task = None
         self.push_task: asyncio.Task = None
 
@@ -243,6 +244,7 @@ async def _initialize_requests_processing(self):
         )
         self.pending_updates_queue = culsans.Queue()
         self.requests_canceled = ThreadingEvent()
+        self.pull_requests_stopped = ThreadingEvent()
 
         # Start background tasks for queue management
         self.pull_task = asyncio.create_task(
@@ -351,7 +353,7 @@ async def _process_next_request(self):
                 request_info=request_info,
             )
 
-            if isinstance(request, Iterable) and not isinstance(request, (str, bytes)):
+            if isinstance(request, (list, tuple)):
                 raise NotImplementedError("Multi-turn requests are not yet supported")
 
             # Calculate when to start processing request
@@ -373,9 +375,8 @@ async def _process_next_request(self):
                 request=request,
                 request_info=request_info,
             )
-            async for resp, info in self.backend.resolve(request, request_info, None):
+            async for resp in self.backend.resolve(request, request_info, None):
                 response = resp
-                request_info = info
 
             # Complete
             request_info.scheduler_timings.resolve_end = time.time()
@@ -460,7 +461,6 @@ async def _handle_request_update(
 
     async def _cancel_pending_requests(self):
         while True:
-            # All requests will be on the queue by now, loop until we can't get anymore
             try:
                 request, request_info = await asyncio.wait_for(
                     self.pending_requests_queue.async_get(), timeout=self.poll_intervals
@@ -474,7 +474,9 @@ async def _cancel_pending_requests(self):
                     request_info=request_info,
                 )
             except (culsans.QueueEmpty, asyncio.TimeoutError):
-                break
+                if self.pull_requests_stopped.is_set():
+                    # No more requests will be put on the Queue
+                    break
 
     def _pull_requests_generator(self) -> Generator:
         last_check = time.time()
@@ -491,14 +493,16 @@ def _pull_requests_generator(self) -> Generator:
                 pass  # No update available, continue polling
             except culsans.QueueShutDown:
                 break
-            except Exception:  # noqa: BLE001
+            except Exception:  # noqa: BLE001, S110
                 pass
 
             if time.time() - last_check > self.poll_intervals:
                 # Yield to allow cancel/error/stop checks in wrapper
                 last_check = time.time()
                 yield None
 
+        self.pull_requests_stopped.set()
+
     def _push_updates_generator(self) -> Generator:
         last_check = time.time()
 
@@ -514,7 +518,7 @@ def _push_updates_generator(self) -> Generator:
                 pass  # No update available, continue polling
             except culsans.QueueShutDown:
                 break
-            except Exception:  # noqa: BLE001
+            except Exception:  # noqa: BLE001, S110
                 pass
 
             if time.time() - last_check > self.poll_intervals:
 
@@ -367,6 +367,7 @@ def _update_state(
                     "completed, errored, cancelled."
                 )
 
+            state.end_time = time.time()  # Always update for last time update received
             actions = {
                 name: const(state, info) for name, const in self.constraints.items()
             }
@@ -465,11 +466,6 @@ def _populate_requests_create_iterator(
                 else self.requests
             )
 
-        if self.infinite_requests is not False and isinstance(self.requests, Iterable):
-            # Out of requests and infinite set to True or set to default
-            # Create new iterator out of the Iterable
-            return iter(self.requests)
-
         if self.infinite_requests is True and isinstance(self.requests, Iterator):
             # Out of requests and infinite set to True, but request_iter is Iterator
             # Cannot create new, raise RuntimeError
@@ -478,6 +474,11 @@ def _populate_requests_create_iterator(
                 "infinite_requests is set to True"
             )
 
+        if self.infinite_requests is not False and isinstance(self.requests, Iterable):
+            # Out of requests and infinite set to True or set to default
+            # Create new iterator out of the Iterable
+            return iter(self.requests)
+
         # Either infinite is False for Iterable or Iterator
         # or infinite is None (default) for Iterator
         # So, return None to stop