vllm-project
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/guidellm/backend/interface.py‎
Lines changed: 88 additions & 0 deletions b/‎src/guidellm/backend/interface.py‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎src/guidellm/config.py‎
Lines changed: 1 addition & 0 deletions b/‎src/guidellm/config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 3 additions & 4 deletions b/‎src/guidellm/scheduler/__init__.py‎
Lines changed: 3 additions & 4 deletions
@@ -44,10 +44,12 @@ keywords = [
 ]
 dependencies = [
     "click>=8.0.0,<8.2.0",
+    "culsans~=0.9.0",
     "datasets",
     "ftfy>=6.0.0",
     "httpx[http2]<1.0.0",
     "loguru",
+    "msgpack",
     "numpy",
     "pillow",
     "protobuf",
 
@@ -0,0 +1,88 @@
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
+from typing import (
+    Any,
+    Generic,
+    Literal,
+    Optional,
+    TypeVar,
+)
+
+from pydantic import Field
+
+from guidellm.objects import StandardBaseModel
+from guidellm.scheduler import (
+    RequestT,
+    RequestTimingsT,
+    ResponseT,
+    ScheduledRequestInfo,
+)
+
+
+class BackendInterface(ABC, Generic[RequestT, RequestTimingsT, ResponseT]):
+    """
+    Abstract interface for request processing backends. Note: before process_startup
+    is invoked, the implementation must ensure all properties are pickleable.
+    """
+
+    @property
+    @abstractmethod
+    def processes_limit(self) -> Optional[int]:
+        """Maximum worker processes supported, or None if unlimited."""
+
+    @property
+    @abstractmethod
+    def requests_limit(self) -> Optional[int]:
+        """Maximum concurrent requests supported, or None if unlimited."""
+
+    @abstractmethod
+    def info(self) -> dict[str, Any]:
+        """
+        :return: Backend metadata including model any initializaiton and
+            configuration information.
+        """
+        ...
+
+    @abstractmethod
+    async def process_startup(self) -> None:
+        """
+        Perform backend initialization and startup procedures.
+
+        :raises: Implementation-specific exceptions for startup failures.
+        """
+
+    @abstractmethod
+    async def validate(self) -> None:
+        """
+        Validate backend configuration and operational status.
+
+        :raises: Implementation-specific exceptions for validation failures.
+        """
+
+    @abstractmethod
+    async def process_shutdown(self) -> None:
+        """
+        Perform backend cleanup and shutdown procedures.
+
+        :raises: Implementation-specific exceptions for shutdown failures.
+        """
+
+    @abstractmethod
+    async def resolve(
+        self,
+        request: RequestT,
+        request_info: ScheduledRequestInfo[RequestTimingsT],
+        history: Optional[list[tuple[RequestT, ResponseT]]] = None,
+    ) -> AsyncIterator[tuple[ResponseT, ScheduledRequestInfo[RequestTimingsT]]]:
+        """
+        Process a request and yield incremental response updates.
+
+        :param request: The request object to process.
+        :param request_info: Scheduling metadata and timing information.
+        :param history: Optional conversation history for multi-turn requests.
+        :yield: Tuples of (response, updated_request_info) for each response chunk.
+        :raises: Implementation-specific exceptions for processing failures.
+        """
+
+
+BackendT = TypeVar("BackendT", bound="BackendInterface")
@@ -133,6 +133,7 @@ class Settings(BaseSettings):
     max_concurrency: int = 512
     max_worker_processes: int = 10
     max_add_requests_per_loop: int = 20
+    scheduler_start_delay_non_distributed: float = 0.1
 
     # Data settings
     dataset: DatasetSettings = DatasetSettings()
 
@@ -25,6 +25,7 @@
     ScheduledRequestInfo,
     SchedulerState,
     SchedulerUpdateAction,
+    SchedulerUpdateActionProgress,
 )
 from .scheduler import Scheduler
 from .strategy import (
@@ -41,9 +42,8 @@
     StrategyType,
     SynchronousStrategy,
     ThroughputStrategy,
-    strategy_display_str,
 )
-from .worker import WorkerProcess, worker_sync_iterable_to_async
+from .worker import WorkerProcess
 from .worker_group import WorkerProcessGroup
 
 __all__ = [
@@ -81,13 +81,12 @@
     "Scheduler",
     "SchedulerState",
     "SchedulerUpdateAction",
+    "SchedulerUpdateActionProgress",
     "SchedulingStrategy",
     "StrategyT",
     "StrategyType",
     "SynchronousStrategy",
     "ThroughputStrategy",
     "WorkerProcess",
     "WorkerProcessGroup",
-    "strategy_display_str",
-    "worker_sync_iterable_to_async",
 ]