vllm-project
diff --git a/‎src/guidellm/backend/__init__.py‎
Lines changed: 15 additions & 12 deletions b/‎src/guidellm/backend/__init__.py‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎src/guidellm/backend/backend.py‎
Lines changed: 75 additions & 215 deletions b/‎src/guidellm/backend/backend.py‎
Lines changed: 75 additions & 215 deletions
@@ -1,23 +1,26 @@
+"""
+Backend infrastructure for GuideLLM language model interactions.
+
+Provides abstract base classes, implemented backends, request/response objects,
+and timing utilities for standardized communication with LLM providers.
+"""
+
 from .backend import (
     Backend,
     BackendType,
 )
-from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
-from .response import (
-    RequestArgs,
-    ResponseSummary,
-    StreamingResponseType,
-    StreamingTextResponse,
+from .objects import (
+    GenerationRequest,
+    GenerationRequestTimings,
+    GenerationResponse,
 )
+from .openai import OpenAIHTTPBackend
 
 __all__ = [
-    "CHAT_COMPLETIONS_PATH",
-    "TEXT_COMPLETIONS_PATH",
     "Backend",
     "BackendType",
+    "GenerationRequest",
+    "GenerationRequestTimings",
+    "GenerationResponse",
     "OpenAIHTTPBackend",
-    "RequestArgs",
-    "ResponseSummary",
-    "StreamingResponseType",
-    "StreamingTextResponse",
 ]
@@ -1,13 +1,27 @@
-from abc import ABC, abstractmethod
-from collections.abc import AsyncGenerator
-from pathlib import Path
-from typing import Any, Literal, Optional, Union
+"""
+Backend interface and registry for generative AI model interactions.
 
-from loguru import logger
-from PIL import Image
+Provides the abstract base class for implementing backends that communicate with
+generative AI models. Backends handle the lifecycle of generation requests.
 
-from guidellm.backend.response import ResponseSummary, StreamingTextResponse
-from guidellm.settings import settings
+Classes:
+    Backend: Abstract base class for generative AI backends with registry support.
+
+Type Aliases:
+    BackendType: Literal type defining supported backend implementations.
+"""
+
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import Literal
+
+from guidellm.backend.objects import (
+    GenerationRequest,
+    GenerationResponse,
+)
+from guidellm.scheduler import BackendInterface
+from guidellm.utils import RegistryMixin
 
 __all__ = [
     "Backend",
@@ -18,242 +32,88 @@
 BackendType = Literal["openai_http"]
 
 
-class Backend(ABC):
+class Backend(
+    RegistryMixin["type[Backend]"],
+    BackendInterface[GenerationRequest, GenerationResponse],
+):
     """
-    Abstract base class for generative AI backends.
-
-    This class provides a common interface for creating and interacting with different
-    generative AI backends. Subclasses should implement the abstract methods to
-    define specific backend behavior.
-
-    :cvar _registry: A registration dictionary that maps BackendType to backend classes.
-    :param type_: The type of the backend.
+    Base class for generative AI backends with registry and lifecycle.
+
+    Provides a standard interface for backends that communicate with generative AI
+    models. Combines the registry pattern for automatic discovery with a defined
+    lifecycle for process-based distributed execution.
+
+    Backend lifecycle phases:
+    1. Creation and configuration
+    2. Process startup - Initialize resources in worker process
+    3. Validation - Verify backend readiness
+    4. Request resolution - Process generation requests
+    5. Process shutdown - Clean up resources
+
+    Backend state (excluding process_startup resources) must be pickleable for
+    distributed execution across process boundaries.
+
+    Example:
+    ::
+        @Backend.register("my_backend")
+        class MyBackend(Backend):
+            def __init__(self, api_key: str):
+                super().__init__("my_backend")
+                self.api_key = api_key
+
+            async def process_startup(self):
+                self.client = MyAPIClient(self.api_key)
+
+        backend = Backend.create("my_backend", api_key="secret")
     """
 
-    _registry: dict[BackendType, "type[Backend]"] = {}
-
-    @classmethod
-    def register(cls, backend_type: BackendType):
-        """
-        A decorator to register a backend class in the backend registry.
-
-        :param backend_type: The type of backend to register.
-        :type backend_type: BackendType
-        :return: The decorated backend class.
-        :rtype: Type[Backend]
-        """
-        if backend_type in cls._registry:
-            raise ValueError(f"Backend type already registered: {backend_type}")
-
-        if not issubclass(cls, Backend):
-            raise TypeError("Only subclasses of Backend can be registered")
-
-        def inner_wrapper(wrapped_class: type["Backend"]):
-            cls._registry[backend_type] = wrapped_class
-            logger.info("Registered backend type: {}", backend_type)
-            return wrapped_class
-
-        return inner_wrapper
-
     @classmethod
-    def create(cls, type_: BackendType, **kwargs) -> "Backend":
+    def create(cls, type_: BackendType, **kwargs) -> Backend:
         """
-        Factory method to create a backend instance based on the backend type.
+        Create a backend instance based on the backend type.
 
         :param type_: The type of backend to create.
-        :type type_: BackendType
         :param kwargs: Additional arguments for backend initialization.
         :return: An instance of a subclass of Backend.
-        :rtype: Backend
         :raises ValueError: If the backend type is not registered.
         """
 
-        logger.info("Creating backend of type {}", type_)
+        backend = cls.get_registered_object(type_)
 
-        if type_ not in cls._registry:
-            err = ValueError(f"Unsupported backend type: {type_}")
-            logger.error("{}", err)
-            raise err
+        if backend is None:
+            raise ValueError(
+                f"Backend type '{type_}' is not registered. "
+                f"Available types: {list(cls.registry.keys()) if cls.registry else []}"
+            )
 
-        return Backend._registry[type_](**kwargs)
+        return backend(**kwargs)
 
     def __init__(self, type_: BackendType):
-        self._type = type_
-
-    @property
-    def type_(self) -> BackendType:
         """
-        :return: The type of the backend.
-        """
-        return self._type
+        Initialize a backend instance.
 
-    @property
-    @abstractmethod
-    def target(self) -> str:
-        """
-        :return: The target location for the backend.
+        :param type_: The backend type identifier.
         """
-        ...
+        self.type_ = type_
 
     @property
-    @abstractmethod
-    def model(self) -> Optional[str]:
+    def processes_limit(self) -> int | None:
         """
-        :return: The model used for the backend requests.
+        :return: Maximum number of worker processes supported. None if unlimited.
         """
-        ...
+        return None
 
     @property
-    @abstractmethod
-    def info(self) -> dict[str, Any]:
-        """
-        :return: The information about the backend.
-        """
-        ...
-
-    @abstractmethod
-    async def reset(self) -> None:
+    def requests_limit(self) -> int | None:
         """
-        Reset the connection object. This is useful for backends that
-        reuse connections or have state that needs to be cleared.
+        :return: Maximum number of concurrent requests supported globally.
+            None if unlimited.
         """
-        ...
-
-    async def validate(self):
-        """
-        Handle final setup and validate the backend is ready for use.
-        If not successful, raises the appropriate exception.
-        """
-        logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
-        await self.check_setup()
-        models = await self.available_models()
-        if not models:
-            raise ValueError("No models available for the backend")
-
-        # Use the preferred route defined in the global settings when performing the
-        # validation request. This avoids calling an unavailable endpoint (ie
-        # /v1/completions) when the deployment only supports the chat completions
-        # endpoint.
-        if settings.preferred_route == "chat_completions":
-            async for _ in self.chat_completions(  # type: ignore[attr-defined]
-                content="Test connection", output_token_count=1
-            ):
-                pass
-        else:
-            async for _ in self.text_completions(  # type: ignore[attr-defined]
-                prompt="Test connection", output_token_count=1
-            ):
-                pass
-
-        await self.reset()
-
-    @abstractmethod
-    async def check_setup(self):
-        """
-        Check the setup for the backend.
-        If unsuccessful, raises the appropriate exception.
-
-        :raises ValueError: If the setup check fails.
-        """
-        ...
-
-    @abstractmethod
-    async def prepare_multiprocessing(self):
-        """
-        Prepare the backend for use in a multiprocessing environment.
-        This is useful for backends that have instance state that can not
-        be shared across processes and should be cleared out and re-initialized
-        for each new process.
-        """
-        ...
-
-    @abstractmethod
-    async def available_models(self) -> list[str]:
-        """
-        Get the list of available models for the backend.
-
-        :return: The list of available models.
-        :rtype: List[str]
-        """
-        ...
+        return None
 
     @abstractmethod
-    async def text_completions(
-        self,
-        prompt: Union[str, list[str]],
-        request_id: Optional[str] = None,
-        prompt_token_count: Optional[int] = None,
-        output_token_count: Optional[int] = None,
-        **kwargs,
-    ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
+    async def default_model(self) -> str | None:
         """
-        Generate text only completions for the given prompt.
-        Does not support multiple modalities, complicated chat interfaces,
-        or chat templates. Specifically, it requests with only the prompt.
-
-        :param prompt: The prompt (or list of prompts) to generate a completion for.
-            If a list is supplied, these are concatenated and run through the model
-            for a single prompt.
-        :param request_id: The unique identifier for the request, if any.
-            Added to logging statements and the response for tracking purposes.
-        :param prompt_token_count: The number of tokens measured in the prompt, if any.
-            Returned in the response stats for later analysis, if applicable.
-        :param output_token_count: If supplied, the number of tokens to enforce
-            generation of for the output for this request.
-        :param kwargs: Additional keyword arguments to pass with the request.
-        :return: An async generator that yields a StreamingTextResponse for start,
-            a StreamingTextResponse for each received iteration,
-            and a ResponseSummary for the final response.
-        """
-        ...
-
-    @abstractmethod
-    async def chat_completions(
-        self,
-        content: Union[
-            str,
-            list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
-            Any,
-        ],
-        request_id: Optional[str] = None,
-        prompt_token_count: Optional[int] = None,
-        output_token_count: Optional[int] = None,
-        raw_content: bool = False,
-        **kwargs,
-    ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
-        """
-        Generate chat completions for the given content.
-        Supports multiple modalities, complicated chat interfaces, and chat templates.
-        Specifically, it requests with the content, which can be any combination of
-        text, images, and audio provided the target model supports it,
-        and returns the output text. Additionally, any chat templates
-        for the model are applied within the backend.
-
-        :param content: The content (or list of content) to generate a completion for.
-            This supports any combination of text, images, and audio (model dependent).
-            Supported text only request examples:
-                content="Sample prompt", content=["Sample prompt", "Second prompt"],
-                content=[{"type": "text", "value": "Sample prompt"}.
-            Supported text and image request examples:
-                content=["Describe the image", PIL.Image.open("image.jpg")],
-                content=["Describe the image", Path("image.jpg")],
-                content=["Describe the image", {"type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
-            Supported text and audio request examples:
-                content=["Transcribe the audio", Path("audio.wav")],
-                content=["Transcribe the audio", {"type": "input_audio",
-                "input_audio": {"data": f"{base64_bytes}", "format": "wav}].
-            Additionally, if raw_content=True then the content is passed directly to the
-            backend without any processing.
-        :param request_id: The unique identifier for the request, if any.
-            Added to logging statements and the response for tracking purposes.
-        :param prompt_token_count: The number of tokens measured in the prompt, if any.
-            Returned in the response stats for later analysis, if applicable.
-        :param output_token_count: If supplied, the number of tokens to enforce
-            generation of for the output for this request.
-        :param kwargs: Additional keyword arguments to pass with the request.
-        :return: An async generator that yields a StreamingTextResponse for start,
-            a StreamingTextResponse for each received iteration,
-            and a ResponseSummary for the final response.
+        :return: The default model name or identifier for generation requests.
         """
         ...