Finalize refactoring of backend package to work with new scheduler refactor

markurtz · markurtz · commit 4a789bcf0ee4 · 2025-08-01T08:53:18.000-04:00
diff --git a/src/guidellm/backend/__init__.py b/src/guidellm/backend/__init__.py
@@ -1,23 +1,24 @@
+"""
+Backend infrastructure for GuideLLM language model interactions.
+
+Provides abstract base classes, implemented backends, request/response objects,
+and timing utilities for standardized communication with LLM providers.
+"""
+
 from .backend import (
     Backend,
     BackendType,
 )
 from .objects import (
-    RequestArgs,
-    ResponseSummary,
-    StreamingResponseType,
-    StreamingTextResponse,
+    GenerationRequest,
+    GenerationRequestTimings,
+    GenerationResponse,
 )
-from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
 
 __all__ = [
-    "CHAT_COMPLETIONS_PATH",
-    "TEXT_COMPLETIONS_PATH",
     "Backend",
     "BackendType",
-    "OpenAIHTTPBackend",
-    "RequestArgs",
-    "ResponseSummary",
-    "StreamingResponseType",
-    "StreamingTextResponse",
+    "GenerationRequest",
+    "GenerationRequestTimings",
+    "GenerationResponse",
 ]
diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py
@@ -1,10 +1,8 @@
 """
 Backend interface and registry for generative AI model interactions.
 
-This module provides the abstract base class and interface for implementing
-backends that communicate with generative AI models. Backends handle the
-lifecycle of generation requests, including startup, validation, request
-processing, and shutdown phases.
+Provides the abstract base class for implementing backends that communicate with
+generative AI models. Backends handle the lifecycle of generation requests.
 
 Classes:
     Backend: Abstract base class for generative AI backends with registry support.
@@ -42,44 +40,38 @@ class Backend(
     """
     Abstract base class for generative AI backends with registry and lifecycle.
 
-    This class defines the interface for implementing backends that communicate with
-    generative AI models. It combines the registry pattern for automatic discovery
-    with a well-defined lifecycle for process-based distributed execution.
+    Provides a standard interface for backends that communicate with generative AI
+    models. Combines the registry pattern for automatic discovery with a defined
+    lifecycle for process-based distributed execution.
 
-    The backend lifecycle consists of four main phases:
-    1. Creation and initial configuration (constructor and factory methods)
-    2. Process startup - Initialize resources within a worker process
-    3. Validation - Verify backend readiness and configuration
-    4. Request resolution - Process generation requests iteratively
-    5. Process shutdown - Clean up resources when process terminates
+    Backend lifecycle phases:
+    1. Creation and configuration
+    2. Process startup - Initialize resources in worker process
+    3. Validation - Verify backend readiness
+    4. Request resolution - Process generation requests
+    5. Process shutdown - Clean up resources
 
-    All backend implementations must ensure that their state (excluding resources
-    created during process_startup) is pickleable to support transfer across
-    process boundaries in distributed execution environments.
+    Backend state (excluding process_startup resources) must be pickleable for
+    distributed execution across process boundaries.
 
     Example:
     ::
-        # Register a custom backend implementation
         @Backend.register("my_backend")
         class MyBackend(Backend):
             def __init__(self, api_key: str):
                 super().__init__("my_backend")
                 self.api_key = api_key
 
             async def process_startup(self):
-                # Initialize process-specific resources
                 self.client = MyAPIClient(self.api_key)
 
-            ...
-
-        # Create backend instance using factory method
         backend = Backend.create("my_backend", api_key="secret")
     """
 
     @classmethod
     def create(cls, type_: BackendType, **kwargs) -> "Backend":
         """
-        Factory method to create a backend instance based on the backend type.
+        Create a backend instance based on the backend type.
 
         :param type_: The type of backend to create.
         :param kwargs: Additional arguments for backend initialization.
@@ -93,65 +85,72 @@ def create(cls, type_: BackendType, **kwargs) -> "Backend":
 
     def __init__(self, type_: BackendType):
         """
-        Initialize a backend instance with the specified type.
+        Initialize a backend instance.
 
-        :param type_: The backend type identifier for this instance.
+        :param type_: The backend type identifier.
         """
         self.type_ = type_
 
     @property
     def processes_limit(self) -> Optional[int]:
         """
-        :return: The maximum number of worker processes supported by the
-            backend. None if not limited.
+        :return: Maximum number of worker processes supported. None if unlimited.
         """
         return None
 
     @property
     def requests_limit(self) -> Optional[int]:
         """
-        :return: The maximum number of concurrent requests that can be processed
-            at once globally by the backend. None if not limited.
+        :return: Maximum number of concurrent requests supported globally.
+            None if unlimited.
         """
         return None
 
+    @abstractmethod
+    def info(self) -> dict[str, Any]:
+        """
+        :return: Backend metadata including model information, endpoints, and
+            configuration data for reporting and diagnostics.
+        """
+        ...
+
     @abstractmethod
     async def process_startup(self):
         """
         Initialize process-specific resources and connections.
 
-        This method is called when a backend instance is transferred to a worker
-        process and needs to establish connections, initialize clients, or set up
-        any other resources required for request processing. All resources created
-        here are process-local and do not need to be pickleable.
-        If there are any errors during startup, this method should raise an
-        appropriate exception.
+        Called when a backend instance is transferred to a worker process.
+        Creates connections, clients, and other resources required for request
+        processing. Resources created here are process-local and need not be
+        pickleable.
 
-        Must be called before validate() or resolve() can be used.
+        Must be called before validate() or resolve().
+
+        :raises: Exception if startup fails.
         """
         ...
 
     @abstractmethod
-    async def validate(self):
+    async def process_shutdown(self):
         """
-        Validate backend configuration and readiness for request processing.
+        Clean up process-specific resources and connections.
 
-        This method verifies that the backend is properly configured and can
-        successfully communicate with the target model service. It should be
-        called after process_startup() and before resolve() to ensure the
-        backend is ready to handle generation requests.
-        If the backend cannot connect to the service or is not ready,
-        this method should raise an appropriate exception.
+        Called when the worker process is shutting down. Cleans up resources
+        created during process_startup(). After this method, validate() and
+        resolve() should not be used.
         """
+        ...
 
     @abstractmethod
-    async def process_shutdown(self):
+    async def validate(self):
         """
-        Clean up process-specific resources and connections.
+        Validate backend configuration and readiness.
 
-        This method is called when the worker process is shutting down and
-        should clean up any resources created during process_startup(). After
-        this method is called, validate() and resolve() should not be used.
+        Verifies the backend is properly configured and can communicate with the
+        target model service. Should be called after process_startup() and before
+        resolve().
+
+        :raises: Exception if backend is not ready or cannot connect.
         """
         ...
 
@@ -167,37 +166,23 @@ async def resolve(
         """
         Process a generation request and yield progressive responses.
 
-        This method processes a generation request through the backend's model
-        service, yielding intermediate responses as the generation progresses.
-        The final yielded item contains the complete response and timing data.
-
-        The request_info parameter is updated with timing metadata and other
-        tracking information throughout the request processing lifecycle.
+        Processes a generation request through the backend's model service,
+        yielding intermediate responses as generation progresses. The final
+        yielded item contains the complete response and timing data.
 
-        :param request: The generation request containing content and parameters.
-        :param request_info: Request tracking information to be updated with
-            timing and progress metadata during processing.
+        :param request: The generation request with content and parameters.
+        :param request_info: Request tracking information updated with timing
+            and progress metadata during processing.
         :param history: Optional conversation history for multi-turn requests.
-            Each tuple contains a previous request-response pair that provides
-            context for the current generation.
-        :yields: Tuples of (response, updated_request_info) as the generation
-            progresses. The final tuple contains the complete response.
-        """
-        ...
-
-    @abstractmethod
-    async def info(self) -> dict[str, Any]:
-        """
-        :return: Dictionary containing backend metadata such as model
-            information, service endpoints, version details, and other
-            configuration data useful for reporting and diagnostics.
+            Each tuple contains a previous request-response pair.
+        :yields: Tuples of (response, updated_request_info) as generation
+            progresses. Final tuple contains the complete response.
         """
         ...
 
     @abstractmethod
     async def default_model(self) -> str:
         """
-        :return: The model name or identifier that this backend is
-            configured to use by default for generation requests.
+        :return: The default model name or identifier for generation requests.
         """
         ...
diff --git a/src/guidellm/backend/objects.py b/src/guidellm/backend/objects.py
@@ -1,17 +1,9 @@
 """
-Backend object models for request and response handling in the GuideLLM toolkit.
+Backend object models for request and response handling.
 
-This module provides standardized models for generation requests, responses,
-and timing information to ensure consistent data handling across different
-backend implementations.
-
-Classes:
-    GenerationRequest: Request model for generation operations with content,
-        parameters, statistics, and constraints.
-    GenerationResponse: Response model containing generation results, token
-        counts, timing information, and error details.
-    GenerationRequestTimings: Timing model for tracking request lifecycle
-        events and performance metrics.
+Provides standardized models for generation requests, responses, and timing
+information to ensure consistent data handling across different backend
+implementations.
 """
 
 import uuid
@@ -30,74 +22,51 @@
 
 
 class GenerationRequest(StandardBaseModel):
-    """
-    Request model for backend generation operations.
-
-    Encapsulates all necessary information for performing text or chat completion
-    requests through backend systems.
-    """
+    """Request model for backend generation operations."""
 
     request_id: str = Field(
         default_factory=lambda: str(uuid.uuid4()),
-        description="The unique identifier for the request.",
+        description="Unique identifier for the request.",
     )
     request_type: Literal["text_completions", "chat_completions"] = Field(
         default="text_completions",
         description=(
-            "The type of request (e.g., text, chat). "
-            "If request_type='text_completions', resolved by backend.text_completions. "
-            "If request_typ='chat_completions', resolved by backend.chat_completions."
+            "Type of request. 'text_completions' uses backend.text_completions(), "
+            "'chat_completions' uses backend.chat_completions()."
         ),
     )
     content: Any = Field(
         description=(
-            "The content for the request to send to the backend. "
-            "For request_type='text_completions', this should be a string or list "
-            "of strings which will be resolved by backend.text_completions(). "
-            "For request_type='chat_completions', this should be a string, "
-            "a list of (str, Dict[str, Union[str, Dict[str, str]]], Path, Image), "
-            "or raw content which will be resolved by backend.chat_completions(). "
-            "For raw content, set raw_content=True in the params field."
+            "Request content. For text_completions: string or list of strings. "
+            "For chat_completions: string, list of messages, or raw content "
+            "(set raw_content=True in params)."
         )
     )
     params: dict[str, Any] = Field(
         default_factory=dict,
         description=(
-            "Additional parameters passed as kwargs to the backend methods. "
-            "For HTTP backends, these are included in the request body. "
-            "Common parameters include max_tokens, temperature, and stream."
+            "Additional parameters passed to backend methods. "
+            "Common: max_tokens, temperature, stream."
         ),
     )
     stats: dict[Literal["prompt_tokens"], int] = Field(
         default_factory=dict,
-        description=(
-            "Request statistics including prompt token count. "
-            "Used for tracking resource usage and performance analysis."
-        ),
+        description="Request statistics including prompt token count.",
     )
     constraints: dict[Literal["output_tokens"], int] = Field(
         default_factory=dict,
-        description=(
-            "Request constraints such as maximum output tokens. "
-            "Used to control backend generation behavior and resource limits."
-        ),
+        description="Request constraints such as maximum output tokens.",
     )
 
 
 class GenerationResponse(StandardBaseModel):
-    """
-    Response model for backend generation operations.
-
-    Contains the results of a generation request including the generated content,
-    token usage statistics, iteration counts, and any errors encountered during
-    processing. Supports both complete responses and streaming delta updates.
-    """
+    """Response model for backend generation operations."""
 
     request_id: str = Field(
         description="Unique identifier matching the original GenerationRequest."
     )
     request_args: dict[str, Any] = Field(
-        description="Arguments that were passed to the backend for this request."
+        description="Arguments passed to the backend for this request."
     )
     value: Optional[str] = Field(
         default=None,
@@ -125,12 +94,7 @@ class GenerationResponse(StandardBaseModel):
 
 
 class GenerationRequestTimings(RequestTimings):
-    """
-    Timing model for tracking generation request lifecycle events.
-
-    Extends the base RequestTimings with generation-specific timing points
-    including first and last iteration timestamps.
-    """
+    """Timing model for tracking generation request lifecycle events."""
 
     first_iteration: Optional[float] = Field(
         default=None,
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py