Backend typing cleanup

sjmonson · sjmonson · commit 1b2c18556e19 · 2025-08-13T16:18:42.000-04:00
diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py
@@ -11,16 +11,14 @@
     BackendType: Literal type defining supported backend implementations.
 """
 
-from abc import abstractmethod
-from collections.abc import AsyncIterator
-from typing import Any, Literal, Optional
+from typing import Literal, Optional
 
 from guidellm.backend.objects import (
     GenerationRequest,
     GenerationRequestTimings,
     GenerationResponse,
 )
-from guidellm.scheduler import BackendInterface, ScheduledRequestInfo
+from guidellm.scheduler import BackendInterface
 from guidellm.utils.registry import RegistryMixin
 
 __all__ = [
@@ -37,7 +35,7 @@ class Backend(
     BackendInterface[GenerationRequest, GenerationRequestTimings, GenerationResponse],
 ):
     """
-    Abstract base class for generative AI backends with registry and lifecycle.
+    Base class for generative AI backends with registry and lifecycle.
 
     Provides a standard interface for backends that communicate with generative AI
     models. Combines the registry pattern for automatic discovery with a defined
@@ -104,84 +102,3 @@ def requests_limit(self) -> Optional[int]:
             None if unlimited.
         """
         return None
-
-    @abstractmethod
-    def info(self) -> dict[str, Any]:
-        """
-        :return: Backend metadata including model any initializaiton and
-            configuration information.
-        """
-        ...
-
-    @abstractmethod
-    async def process_startup(self):
-        """
-        Initialize process-specific resources and connections.
-
-        Called when a backend instance is transferred to a worker process.
-        Creates connections, clients, and other resources required for request
-        processing. Resources created here are process-local and need not be
-        pickleable.
-
-        Must be called before validate() or resolve().
-
-        :raises: Exception if startup fails.
-        """
-        ...
-
-    @abstractmethod
-    async def process_shutdown(self):
-        """
-        Clean up process-specific resources and connections.
-
-        Called when the worker process is shutting down. Cleans up resources
-        created during process_startup(). After this method, validate() and
-        resolve() should not be used.
-        """
-        ...
-
-    @abstractmethod
-    async def validate(self):
-        """
-        Validate backend configuration and readiness.
-
-        Verifies the backend is properly configured and can communicate with the
-        target model service. Should be called after process_startup() and before
-        resolve().
-
-        :raises: Exception if backend is not ready or cannot connect.
-        """
-        ...
-
-    @abstractmethod
-    async def resolve(
-        self,
-        request: GenerationRequest,
-        request_info: ScheduledRequestInfo[GenerationRequestTimings],
-        history: Optional[list[tuple[GenerationRequest, GenerationResponse]]] = None,
-    ) -> AsyncIterator[
-        tuple[GenerationResponse, ScheduledRequestInfo[GenerationRequestTimings]]
-    ]:
-        """
-        Process a generation request and yield progressive responses.
-
-        Processes a generation request through the backend's model service,
-        yielding intermediate responses as generation progresses. The final
-        yielded item contains the complete response and timing data.
-
-        :param request: The generation request with content and parameters.
-        :param request_info: Request tracking information updated with timing
-            and progress metadata during processing.
-        :param history: Optional conversation history for multi-turn requests.
-            Each tuple contains a previous request-response pair.
-        :yields: Tuples of (response, updated_request_info) as generation
-            progresses. Final tuple contains the complete response.
-        """
-        ...
-
-    @abstractmethod
-    async def default_model(self) -> str:
-        """
-        :return: The default model name or identifier for generation requests.
-        """
-        ...
diff --git a/src/guidellm/backend/interface.py b/src/guidellm/backend/interface.py
@@ -25,11 +25,13 @@ class BackendInterface(ABC, Generic[RequestT, MeasuredRequestTimingsT, ResponseT
     @abstractmethod
     def processes_limit(self) -> Optional[int]:
         """Maximum worker processes supported, or None if unlimited."""
+        ...
 
     @property
     @abstractmethod
     def requests_limit(self) -> Optional[int]:
         """Maximum concurrent requests supported, or None if unlimited."""
+        ...
 
     @abstractmethod
     def info(self) -> dict[str, Any]:
@@ -46,6 +48,7 @@ async def process_startup(self) -> None:
 
         :raises: Implementation-specific exceptions for startup failures.
         """
+        ...
 
     @abstractmethod
     async def validate(self) -> None:
@@ -54,6 +57,7 @@ async def validate(self) -> None:
 
         :raises: Implementation-specific exceptions for validation failures.
         """
+        ...
 
     @abstractmethod
     async def process_shutdown(self) -> None:
@@ -62,9 +66,10 @@ async def process_shutdown(self) -> None:
 
         :raises: Implementation-specific exceptions for shutdown failures.
         """
+        ...
 
     @abstractmethod
-    async def resolve(
+    def resolve(
         self,
         request: RequestT,
         request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
@@ -79,6 +84,14 @@ async def resolve(
         :yield: Tuples of (response, updated_request_info) for each response chunk.
         :raises: Implementation-specific exceptions for processing failures.
         """
+        ...
+
+    @abstractmethod
+    async def default_model(self) -> Optional[str]:
+        """
+        :return: The default model name or identifier for generation requests.
+        """
+        ...
 
 
 BackendT = TypeVar("BackendT", bound="BackendInterface")
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -139,7 +139,7 @@ def __init__(
         self._in_process = False
         self._async_client: Optional[httpx.AsyncClient] = None
 
-    async def info(self) -> dict[str, Any]:
+    def info(self) -> dict[str, Any]:
         """
         :return: Dictionary containing backend configuration details.
         """
@@ -190,7 +190,7 @@ async def process_shutdown(self):
         if not self._in_process:
             raise RuntimeError("Backend not started up for process.")
 
-        await self._async_client.aclose()
+        await self._async_client.aclose()  # type: ignore [union-attr]
         self._async_client = None
         self._in_process = False
 
@@ -210,7 +210,7 @@ async def validate(self):
                 # Model is set, use /health endpoint as first check
                 target = f"{self.target}{self.HEALTH_PATH}"
                 headers = self._get_headers()
-                response = await self._async_client.get(target, headers=headers)
+                response = await self._async_client.get(target, headers=headers)  # type: ignore [union-attr]
                 response.raise_for_status()
 
                 return
@@ -258,7 +258,7 @@ async def available_models(self) -> list[str]:
         target = f"{self.target}{self.MODELS_PATH}"
         headers = self._get_headers()
         params = self._get_params(self.MODELS_KEY)
-        response = await self._async_client.get(target, headers=headers, params=params)
+        response = await self._async_client.get(target, headers=headers, params=params)  # type: ignore [union-attr]
         response.raise_for_status()
 
         return [item["id"] for item in response.json()["data"]]
@@ -305,7 +305,7 @@ async def resolve(
             request_id=request.request_id,
             request_args={
                 "request_type": request.request_type,
-                "output_token_count": request.constraints.get("max_output_tokens"),
+                "output_token_count": request.constraints.get("output_tokens"),
                 **request.params,
             },
             value="",
@@ -324,15 +324,15 @@ async def resolve(
             {
                 "prompt": request.content,
                 "request_id": request.request_id,
-                "output_token_count": request.constraints.get("max_output_tokens"),
+                "output_token_count": request.constraints.get("output_tokens"),
                 "stream_response": request.params.get("stream", self.stream_response),
                 **request.params,
             }
             if request.request_type == "text_completions"
             else {
                 "content": request.content,
                 "request_id": request.request_id,
-                "output_token_count": request.constraints.get("max_output_tokens"),
+                "output_token_count": request.constraints.get("output_tokens"),
                 "stream_response": request.params.get("stream", self.stream_response),
                 **request.params,
             }
@@ -345,7 +345,7 @@ async def resolve(
             if delta is not None:
                 if request_info.request_timings.first_iteration is None:
                     request_info.request_timings.first_iteration = time.time()
-                response.value += delta
+                response.value += delta  # type: ignore [operator]
                 response.delta = delta
                 request_info.request_timings.last_iteration = time.time()
                 response.iterations += 1
@@ -396,7 +396,7 @@ async def text_completions(
         yield None, None  # Initial yield for async iterator to signal start
 
         if not stream_response:
-            response = await self._async_client.post(
+            response = await self._async_client.post(  # type: ignore [union-attr]
                 target,
                 headers=headers,
                 params=params,
@@ -411,7 +411,7 @@ async def text_completions(
             return
 
         body.update({"stream": True, "stream_options": {"include_usage": True}})
-        async with self._async_client.stream(
+        async with self._async_client.stream(  # type: ignore [union-attr]
             "POST",
             target,
             headers=headers,
@@ -474,7 +474,7 @@ async def chat_completions(
         yield None, None  # Initial yield for async iterator to signal start
 
         if not stream_response:
-            response = await self._async_client.post(
+            response = await self._async_client.post(  # type: ignore [union-attr]
                 target, headers=headers, params=params, json=body
             )
             response.raise_for_status()
@@ -486,7 +486,7 @@ async def chat_completions(
             return
 
         body.update({"stream": True, "stream_options": {"include_usage": True}})
-        async with self._async_client.stream(
+        async with self._async_client.stream(  # type: ignore [union-attr]
             "POST", target, headers=headers, params=params, json=body
         ) as stream:
             stream.raise_for_status()
diff --git a/tests/unit/backend/test_interface.py b/tests/unit/backend/test_interface.py
@@ -60,6 +60,9 @@ async def process_shutdown(self) -> None:
             async def resolve(self, request, request_info, history=None):
                 yield request, request_info
 
+            async def default_model(self) -> str:
+                return "my-model"
+
         # Should be able to instantiate
         backend = MinimalBackend()
         assert backend is not None
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py
@@ -132,7 +132,7 @@ async def test_openai_backend_info(self):
             target="http://test", model="test-model", timeout=30.0
         )
 
-        info = await backend.info()
+        info = backend.info()
 
         assert info["target"] == "http://test"
         assert info["model"] == "test-model"

Original file line number	Diff line number	Diff line change
`@@ -132,7 +132,7 @@ async def test_openai_backend_info(self):`
`132`	`132`	`target="http://test", model="test-model", timeout=30.0`
`133`	`133`	`)`
`134`	`134`
`135`		`- info = await backend.info()`
	`135`	`+ info = backend.info()`
`136`	`136`
`137`	`137`	`assert info["target"] == "http://test"`
`138`	`138`	`assert info["model"] == "test-model"`