|
1 |
| -from abc import ABC, abstractmethod |
2 |
| -from collections.abc import AsyncGenerator |
3 |
| -from pathlib import Path |
4 |
| -from typing import Any, Literal, Optional, Union |
| 1 | +""" |
| 2 | +Backend interface and registry for generative AI model interactions. |
5 | 3 |
|
6 |
| -from loguru import logger |
7 |
| -from PIL import Image |
| 4 | +Provides the abstract base class for implementing backends that communicate with |
| 5 | +generative AI models. Backends handle the lifecycle of generation requests. |
8 | 6 |
|
9 |
| -from guidellm.backend.response import ResponseSummary, StreamingTextResponse |
10 |
| -from guidellm.settings import settings |
| 7 | +Classes: |
| 8 | + Backend: Abstract base class for generative AI backends with registry support. |
| 9 | +
|
| 10 | +Type Aliases: |
| 11 | + BackendType: Literal type defining supported backend implementations. |
| 12 | +""" |
| 13 | + |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +from abc import abstractmethod |
| 17 | +from typing import Literal |
| 18 | + |
| 19 | +from guidellm.backend.objects import ( |
| 20 | + GenerationRequest, |
| 21 | + GenerationResponse, |
| 22 | +) |
| 23 | +from guidellm.scheduler import BackendInterface |
| 24 | +from guidellm.utils import RegistryMixin |
11 | 25 |
|
12 | 26 | __all__ = [
|
13 | 27 | "Backend",
|
|
18 | 32 | BackendType = Literal["openai_http"]
|
19 | 33 |
|
20 | 34 |
|
21 |
| -class Backend(ABC): |
| 35 | +class Backend( |
| 36 | + RegistryMixin["type[Backend]"], |
| 37 | + BackendInterface[GenerationRequest, GenerationResponse], |
| 38 | +): |
22 | 39 | """
|
23 |
| - Abstract base class for generative AI backends. |
24 |
| -
|
25 |
| - This class provides a common interface for creating and interacting with different |
26 |
| - generative AI backends. Subclasses should implement the abstract methods to |
27 |
| - define specific backend behavior. |
28 |
| -
|
29 |
| - :cvar _registry: A registration dictionary that maps BackendType to backend classes. |
30 |
| - :param type_: The type of the backend. |
| 40 | + Base class for generative AI backends with registry and lifecycle. |
| 41 | +
|
| 42 | + Provides a standard interface for backends that communicate with generative AI |
| 43 | + models. Combines the registry pattern for automatic discovery with a defined |
| 44 | + lifecycle for process-based distributed execution. |
| 45 | +
|
| 46 | + Backend lifecycle phases: |
| 47 | + 1. Creation and configuration |
| 48 | + 2. Process startup - Initialize resources in worker process |
| 49 | + 3. Validation - Verify backend readiness |
| 50 | + 4. Request resolution - Process generation requests |
| 51 | + 5. Process shutdown - Clean up resources |
| 52 | +
|
| 53 | + Backend state (excluding process_startup resources) must be pickleable for |
| 54 | + distributed execution across process boundaries. |
| 55 | +
|
| 56 | + Example: |
| 57 | + :: |
| 58 | + @Backend.register("my_backend") |
| 59 | + class MyBackend(Backend): |
| 60 | + def __init__(self, api_key: str): |
| 61 | + super().__init__("my_backend") |
| 62 | + self.api_key = api_key |
| 63 | +
|
| 64 | + async def process_startup(self): |
| 65 | + self.client = MyAPIClient(self.api_key) |
| 66 | +
|
| 67 | + backend = Backend.create("my_backend", api_key="secret") |
31 | 68 | """
|
32 | 69 |
|
33 |
| - _registry: dict[BackendType, "type[Backend]"] = {} |
34 |
| - |
35 |
| - @classmethod |
36 |
| - def register(cls, backend_type: BackendType): |
37 |
| - """ |
38 |
| - A decorator to register a backend class in the backend registry. |
39 |
| -
|
40 |
| - :param backend_type: The type of backend to register. |
41 |
| - :type backend_type: BackendType |
42 |
| - :return: The decorated backend class. |
43 |
| - :rtype: Type[Backend] |
44 |
| - """ |
45 |
| - if backend_type in cls._registry: |
46 |
| - raise ValueError(f"Backend type already registered: {backend_type}") |
47 |
| - |
48 |
| - if not issubclass(cls, Backend): |
49 |
| - raise TypeError("Only subclasses of Backend can be registered") |
50 |
| - |
51 |
| - def inner_wrapper(wrapped_class: type["Backend"]): |
52 |
| - cls._registry[backend_type] = wrapped_class |
53 |
| - logger.info("Registered backend type: {}", backend_type) |
54 |
| - return wrapped_class |
55 |
| - |
56 |
| - return inner_wrapper |
57 |
| - |
58 | 70 | @classmethod
|
59 |
| - def create(cls, type_: BackendType, **kwargs) -> "Backend": |
| 71 | + def create(cls, type_: BackendType, **kwargs) -> Backend: |
60 | 72 | """
|
61 |
| - Factory method to create a backend instance based on the backend type. |
| 73 | + Create a backend instance based on the backend type. |
62 | 74 |
|
63 | 75 | :param type_: The type of backend to create.
|
64 |
| - :type type_: BackendType |
65 | 76 | :param kwargs: Additional arguments for backend initialization.
|
66 | 77 | :return: An instance of a subclass of Backend.
|
67 |
| - :rtype: Backend |
68 | 78 | :raises ValueError: If the backend type is not registered.
|
69 | 79 | """
|
70 | 80 |
|
71 |
| - logger.info("Creating backend of type {}", type_) |
| 81 | + backend = cls.get_registered_object(type_) |
72 | 82 |
|
73 |
| - if type_ not in cls._registry: |
74 |
| - err = ValueError(f"Unsupported backend type: {type_}") |
75 |
| - logger.error("{}", err) |
76 |
| - raise err |
| 83 | + if backend is None: |
| 84 | + raise ValueError( |
| 85 | + f"Backend type '{type_}' is not registered. " |
| 86 | + f"Available types: {list(cls.registry.keys()) if cls.registry else []}" |
| 87 | + ) |
77 | 88 |
|
78 |
| - return Backend._registry[type_](**kwargs) |
| 89 | + return backend(**kwargs) |
79 | 90 |
|
80 | 91 | def __init__(self, type_: BackendType):
|
81 |
| - self._type = type_ |
82 |
| - |
83 |
| - @property |
84 |
| - def type_(self) -> BackendType: |
85 | 92 | """
|
86 |
| - :return: The type of the backend. |
87 |
| - """ |
88 |
| - return self._type |
| 93 | + Initialize a backend instance. |
89 | 94 |
|
90 |
| - @property |
91 |
| - @abstractmethod |
92 |
| - def target(self) -> str: |
93 |
| - """ |
94 |
| - :return: The target location for the backend. |
| 95 | + :param type_: The backend type identifier. |
95 | 96 | """
|
96 |
| - ... |
| 97 | + self.type_ = type_ |
97 | 98 |
|
98 | 99 | @property
|
99 |
| - @abstractmethod |
100 |
| - def model(self) -> Optional[str]: |
| 100 | + def processes_limit(self) -> int | None: |
101 | 101 | """
|
102 |
| - :return: The model used for the backend requests. |
| 102 | + :return: Maximum number of worker processes supported. None if unlimited. |
103 | 103 | """
|
104 |
| - ... |
| 104 | + return None |
105 | 105 |
|
106 | 106 | @property
|
107 |
| - @abstractmethod |
108 |
| - def info(self) -> dict[str, Any]: |
109 |
| - """ |
110 |
| - :return: The information about the backend. |
111 |
| - """ |
112 |
| - ... |
113 |
| - |
114 |
| - @abstractmethod |
115 |
| - async def reset(self) -> None: |
| 107 | + def requests_limit(self) -> int | None: |
116 | 108 | """
|
117 |
| - Reset the connection object. This is useful for backends that |
118 |
| - reuse connections or have state that needs to be cleared. |
| 109 | + :return: Maximum number of concurrent requests supported globally. |
| 110 | + None if unlimited. |
119 | 111 | """
|
120 |
| - ... |
121 |
| - |
122 |
| - async def validate(self): |
123 |
| - """ |
124 |
| - Handle final setup and validate the backend is ready for use. |
125 |
| - If not successful, raises the appropriate exception. |
126 |
| - """ |
127 |
| - logger.info("{} validating backend {}", self.__class__.__name__, self.type_) |
128 |
| - await self.check_setup() |
129 |
| - models = await self.available_models() |
130 |
| - if not models: |
131 |
| - raise ValueError("No models available for the backend") |
132 |
| - |
133 |
| - # Use the preferred route defined in the global settings when performing the |
134 |
| - # validation request. This avoids calling an unavailable endpoint (ie |
135 |
| - # /v1/completions) when the deployment only supports the chat completions |
136 |
| - # endpoint. |
137 |
| - if settings.preferred_route == "chat_completions": |
138 |
| - async for _ in self.chat_completions( # type: ignore[attr-defined] |
139 |
| - content="Test connection", output_token_count=1 |
140 |
| - ): |
141 |
| - pass |
142 |
| - else: |
143 |
| - async for _ in self.text_completions( # type: ignore[attr-defined] |
144 |
| - prompt="Test connection", output_token_count=1 |
145 |
| - ): |
146 |
| - pass |
147 |
| - |
148 |
| - await self.reset() |
149 |
| - |
150 |
| - @abstractmethod |
151 |
| - async def check_setup(self): |
152 |
| - """ |
153 |
| - Check the setup for the backend. |
154 |
| - If unsuccessful, raises the appropriate exception. |
155 |
| -
|
156 |
| - :raises ValueError: If the setup check fails. |
157 |
| - """ |
158 |
| - ... |
159 |
| - |
160 |
| - @abstractmethod |
161 |
| - async def prepare_multiprocessing(self): |
162 |
| - """ |
163 |
| - Prepare the backend for use in a multiprocessing environment. |
164 |
| - This is useful for backends that have instance state that can not |
165 |
| - be shared across processes and should be cleared out and re-initialized |
166 |
| - for each new process. |
167 |
| - """ |
168 |
| - ... |
169 |
| - |
170 |
| - @abstractmethod |
171 |
| - async def available_models(self) -> list[str]: |
172 |
| - """ |
173 |
| - Get the list of available models for the backend. |
174 |
| -
|
175 |
| - :return: The list of available models. |
176 |
| - :rtype: List[str] |
177 |
| - """ |
178 |
| - ... |
| 112 | + return None |
179 | 113 |
|
180 | 114 | @abstractmethod
|
181 |
| - async def text_completions( |
182 |
| - self, |
183 |
| - prompt: Union[str, list[str]], |
184 |
| - request_id: Optional[str] = None, |
185 |
| - prompt_token_count: Optional[int] = None, |
186 |
| - output_token_count: Optional[int] = None, |
187 |
| - **kwargs, |
188 |
| - ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
| 115 | + async def default_model(self) -> str | None: |
189 | 116 | """
|
190 |
| - Generate text only completions for the given prompt. |
191 |
| - Does not support multiple modalities, complicated chat interfaces, |
192 |
| - or chat templates. Specifically, it requests with only the prompt. |
193 |
| -
|
194 |
| - :param prompt: The prompt (or list of prompts) to generate a completion for. |
195 |
| - If a list is supplied, these are concatenated and run through the model |
196 |
| - for a single prompt. |
197 |
| - :param request_id: The unique identifier for the request, if any. |
198 |
| - Added to logging statements and the response for tracking purposes. |
199 |
| - :param prompt_token_count: The number of tokens measured in the prompt, if any. |
200 |
| - Returned in the response stats for later analysis, if applicable. |
201 |
| - :param output_token_count: If supplied, the number of tokens to enforce |
202 |
| - generation of for the output for this request. |
203 |
| - :param kwargs: Additional keyword arguments to pass with the request. |
204 |
| - :return: An async generator that yields a StreamingTextResponse for start, |
205 |
| - a StreamingTextResponse for each received iteration, |
206 |
| - and a ResponseSummary for the final response. |
207 |
| - """ |
208 |
| - ... |
209 |
| - |
210 |
| - @abstractmethod |
211 |
| - async def chat_completions( |
212 |
| - self, |
213 |
| - content: Union[ |
214 |
| - str, |
215 |
| - list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]], |
216 |
| - Any, |
217 |
| - ], |
218 |
| - request_id: Optional[str] = None, |
219 |
| - prompt_token_count: Optional[int] = None, |
220 |
| - output_token_count: Optional[int] = None, |
221 |
| - raw_content: bool = False, |
222 |
| - **kwargs, |
223 |
| - ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
224 |
| - """ |
225 |
| - Generate chat completions for the given content. |
226 |
| - Supports multiple modalities, complicated chat interfaces, and chat templates. |
227 |
| - Specifically, it requests with the content, which can be any combination of |
228 |
| - text, images, and audio provided the target model supports it, |
229 |
| - and returns the output text. Additionally, any chat templates |
230 |
| - for the model are applied within the backend. |
231 |
| -
|
232 |
| - :param content: The content (or list of content) to generate a completion for. |
233 |
| - This supports any combination of text, images, and audio (model dependent). |
234 |
| - Supported text only request examples: |
235 |
| - content="Sample prompt", content=["Sample prompt", "Second prompt"], |
236 |
| - content=[{"type": "text", "value": "Sample prompt"}. |
237 |
| - Supported text and image request examples: |
238 |
| - content=["Describe the image", PIL.Image.open("image.jpg")], |
239 |
| - content=["Describe the image", Path("image.jpg")], |
240 |
| - content=["Describe the image", {"type": "image_url", |
241 |
| - "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}]. |
242 |
| - Supported text and audio request examples: |
243 |
| - content=["Transcribe the audio", Path("audio.wav")], |
244 |
| - content=["Transcribe the audio", {"type": "input_audio", |
245 |
| - "input_audio": {"data": f"{base64_bytes}", "format": "wav}]. |
246 |
| - Additionally, if raw_content=True then the content is passed directly to the |
247 |
| - backend without any processing. |
248 |
| - :param request_id: The unique identifier for the request, if any. |
249 |
| - Added to logging statements and the response for tracking purposes. |
250 |
| - :param prompt_token_count: The number of tokens measured in the prompt, if any. |
251 |
| - Returned in the response stats for later analysis, if applicable. |
252 |
| - :param output_token_count: If supplied, the number of tokens to enforce |
253 |
| - generation of for the output for this request. |
254 |
| - :param kwargs: Additional keyword arguments to pass with the request. |
255 |
| - :return: An async generator that yields a StreamingTextResponse for start, |
256 |
| - a StreamingTextResponse for each received iteration, |
257 |
| - and a ResponseSummary for the final response. |
| 117 | + :return: The default model name or identifier for generation requests. |
258 | 118 | """
|
259 | 119 | ...
|
0 commit comments