Skip to content

Commit 02554b0

Browse files
committed
backend refactor implementations
Signed-off-by: Mark Kurtz <[email protected]>
1 parent a7ae737 commit 02554b0

File tree

10 files changed

+2733
-1560
lines changed

10 files changed

+2733
-1560
lines changed

src/guidellm/backend/__init__.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
1+
"""
2+
Backend infrastructure for GuideLLM language model interactions.
3+
4+
Provides abstract base classes, implemented backends, request/response objects,
5+
and timing utilities for standardized communication with LLM providers.
6+
"""
7+
18
from .backend import (
29
Backend,
310
BackendType,
411
)
5-
from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
6-
from .response import (
7-
RequestArgs,
8-
ResponseSummary,
9-
StreamingResponseType,
10-
StreamingTextResponse,
12+
from .objects import (
13+
GenerationRequest,
14+
GenerationRequestTimings,
15+
GenerationResponse,
1116
)
17+
from .openai import OpenAIHTTPBackend
1218

1319
__all__ = [
14-
"CHAT_COMPLETIONS_PATH",
15-
"TEXT_COMPLETIONS_PATH",
1620
"Backend",
1721
"BackendType",
22+
"GenerationRequest",
23+
"GenerationRequestTimings",
24+
"GenerationResponse",
1825
"OpenAIHTTPBackend",
19-
"RequestArgs",
20-
"ResponseSummary",
21-
"StreamingResponseType",
22-
"StreamingTextResponse",
2326
]

src/guidellm/backend/backend.py

Lines changed: 75 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
1-
from abc import ABC, abstractmethod
2-
from collections.abc import AsyncGenerator
3-
from pathlib import Path
4-
from typing import Any, Literal, Optional, Union
1+
"""
2+
Backend interface and registry for generative AI model interactions.
53
6-
from loguru import logger
7-
from PIL import Image
4+
Provides the abstract base class for implementing backends that communicate with
5+
generative AI models. Backends handle the lifecycle of generation requests.
86
9-
from guidellm.backend.response import ResponseSummary, StreamingTextResponse
10-
from guidellm.settings import settings
7+
Classes:
8+
Backend: Abstract base class for generative AI backends with registry support.
9+
10+
Type Aliases:
11+
BackendType: Literal type defining supported backend implementations.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
from abc import abstractmethod
17+
from typing import Literal
18+
19+
from guidellm.backend.objects import (
20+
GenerationRequest,
21+
GenerationResponse,
22+
)
23+
from guidellm.scheduler import BackendInterface
24+
from guidellm.utils import RegistryMixin
1125

1226
__all__ = [
1327
"Backend",
@@ -18,242 +32,88 @@
1832
BackendType = Literal["openai_http"]
1933

2034

21-
class Backend(ABC):
35+
class Backend(
36+
RegistryMixin["type[Backend]"],
37+
BackendInterface[GenerationRequest, GenerationResponse],
38+
):
2239
"""
23-
Abstract base class for generative AI backends.
24-
25-
This class provides a common interface for creating and interacting with different
26-
generative AI backends. Subclasses should implement the abstract methods to
27-
define specific backend behavior.
28-
29-
:cvar _registry: A registration dictionary that maps BackendType to backend classes.
30-
:param type_: The type of the backend.
40+
Base class for generative AI backends with registry and lifecycle.
41+
42+
Provides a standard interface for backends that communicate with generative AI
43+
models. Combines the registry pattern for automatic discovery with a defined
44+
lifecycle for process-based distributed execution.
45+
46+
Backend lifecycle phases:
47+
1. Creation and configuration
48+
2. Process startup - Initialize resources in worker process
49+
3. Validation - Verify backend readiness
50+
4. Request resolution - Process generation requests
51+
5. Process shutdown - Clean up resources
52+
53+
Backend state (excluding process_startup resources) must be pickleable for
54+
distributed execution across process boundaries.
55+
56+
Example:
57+
::
58+
@Backend.register("my_backend")
59+
class MyBackend(Backend):
60+
def __init__(self, api_key: str):
61+
super().__init__("my_backend")
62+
self.api_key = api_key
63+
64+
async def process_startup(self):
65+
self.client = MyAPIClient(self.api_key)
66+
67+
backend = Backend.create("my_backend", api_key="secret")
3168
"""
3269

33-
_registry: dict[BackendType, "type[Backend]"] = {}
34-
35-
@classmethod
36-
def register(cls, backend_type: BackendType):
37-
"""
38-
A decorator to register a backend class in the backend registry.
39-
40-
:param backend_type: The type of backend to register.
41-
:type backend_type: BackendType
42-
:return: The decorated backend class.
43-
:rtype: Type[Backend]
44-
"""
45-
if backend_type in cls._registry:
46-
raise ValueError(f"Backend type already registered: {backend_type}")
47-
48-
if not issubclass(cls, Backend):
49-
raise TypeError("Only subclasses of Backend can be registered")
50-
51-
def inner_wrapper(wrapped_class: type["Backend"]):
52-
cls._registry[backend_type] = wrapped_class
53-
logger.info("Registered backend type: {}", backend_type)
54-
return wrapped_class
55-
56-
return inner_wrapper
57-
5870
@classmethod
59-
def create(cls, type_: BackendType, **kwargs) -> "Backend":
71+
def create(cls, type_: BackendType, **kwargs) -> Backend:
6072
"""
61-
Factory method to create a backend instance based on the backend type.
73+
Create a backend instance based on the backend type.
6274
6375
:param type_: The type of backend to create.
64-
:type type_: BackendType
6576
:param kwargs: Additional arguments for backend initialization.
6677
:return: An instance of a subclass of Backend.
67-
:rtype: Backend
6878
:raises ValueError: If the backend type is not registered.
6979
"""
7080

71-
logger.info("Creating backend of type {}", type_)
81+
backend = cls.get_registered_object(type_)
7282

73-
if type_ not in cls._registry:
74-
err = ValueError(f"Unsupported backend type: {type_}")
75-
logger.error("{}", err)
76-
raise err
83+
if backend is None:
84+
raise ValueError(
85+
f"Backend type '{type_}' is not registered. "
86+
f"Available types: {list(cls.registry.keys()) if cls.registry else []}"
87+
)
7788

78-
return Backend._registry[type_](**kwargs)
89+
return backend(**kwargs)
7990

8091
def __init__(self, type_: BackendType):
81-
self._type = type_
82-
83-
@property
84-
def type_(self) -> BackendType:
8592
"""
86-
:return: The type of the backend.
87-
"""
88-
return self._type
93+
Initialize a backend instance.
8994
90-
@property
91-
@abstractmethod
92-
def target(self) -> str:
93-
"""
94-
:return: The target location for the backend.
95+
:param type_: The backend type identifier.
9596
"""
96-
...
97+
self.type_ = type_
9798

9899
@property
99-
@abstractmethod
100-
def model(self) -> Optional[str]:
100+
def processes_limit(self) -> int | None:
101101
"""
102-
:return: The model used for the backend requests.
102+
:return: Maximum number of worker processes supported. None if unlimited.
103103
"""
104-
...
104+
return None
105105

106106
@property
107-
@abstractmethod
108-
def info(self) -> dict[str, Any]:
109-
"""
110-
:return: The information about the backend.
111-
"""
112-
...
113-
114-
@abstractmethod
115-
async def reset(self) -> None:
107+
def requests_limit(self) -> int | None:
116108
"""
117-
Reset the connection object. This is useful for backends that
118-
reuse connections or have state that needs to be cleared.
109+
:return: Maximum number of concurrent requests supported globally.
110+
None if unlimited.
119111
"""
120-
...
121-
122-
async def validate(self):
123-
"""
124-
Handle final setup and validate the backend is ready for use.
125-
If not successful, raises the appropriate exception.
126-
"""
127-
logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
128-
await self.check_setup()
129-
models = await self.available_models()
130-
if not models:
131-
raise ValueError("No models available for the backend")
132-
133-
# Use the preferred route defined in the global settings when performing the
134-
# validation request. This avoids calling an unavailable endpoint (ie
135-
# /v1/completions) when the deployment only supports the chat completions
136-
# endpoint.
137-
if settings.preferred_route == "chat_completions":
138-
async for _ in self.chat_completions( # type: ignore[attr-defined]
139-
content="Test connection", output_token_count=1
140-
):
141-
pass
142-
else:
143-
async for _ in self.text_completions( # type: ignore[attr-defined]
144-
prompt="Test connection", output_token_count=1
145-
):
146-
pass
147-
148-
await self.reset()
149-
150-
@abstractmethod
151-
async def check_setup(self):
152-
"""
153-
Check the setup for the backend.
154-
If unsuccessful, raises the appropriate exception.
155-
156-
:raises ValueError: If the setup check fails.
157-
"""
158-
...
159-
160-
@abstractmethod
161-
async def prepare_multiprocessing(self):
162-
"""
163-
Prepare the backend for use in a multiprocessing environment.
164-
This is useful for backends that have instance state that can not
165-
be shared across processes and should be cleared out and re-initialized
166-
for each new process.
167-
"""
168-
...
169-
170-
@abstractmethod
171-
async def available_models(self) -> list[str]:
172-
"""
173-
Get the list of available models for the backend.
174-
175-
:return: The list of available models.
176-
:rtype: List[str]
177-
"""
178-
...
112+
return None
179113

180114
@abstractmethod
181-
async def text_completions(
182-
self,
183-
prompt: Union[str, list[str]],
184-
request_id: Optional[str] = None,
185-
prompt_token_count: Optional[int] = None,
186-
output_token_count: Optional[int] = None,
187-
**kwargs,
188-
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
115+
async def default_model(self) -> str | None:
189116
"""
190-
Generate text only completions for the given prompt.
191-
Does not support multiple modalities, complicated chat interfaces,
192-
or chat templates. Specifically, it requests with only the prompt.
193-
194-
:param prompt: The prompt (or list of prompts) to generate a completion for.
195-
If a list is supplied, these are concatenated and run through the model
196-
for a single prompt.
197-
:param request_id: The unique identifier for the request, if any.
198-
Added to logging statements and the response for tracking purposes.
199-
:param prompt_token_count: The number of tokens measured in the prompt, if any.
200-
Returned in the response stats for later analysis, if applicable.
201-
:param output_token_count: If supplied, the number of tokens to enforce
202-
generation of for the output for this request.
203-
:param kwargs: Additional keyword arguments to pass with the request.
204-
:return: An async generator that yields a StreamingTextResponse for start,
205-
a StreamingTextResponse for each received iteration,
206-
and a ResponseSummary for the final response.
207-
"""
208-
...
209-
210-
@abstractmethod
211-
async def chat_completions(
212-
self,
213-
content: Union[
214-
str,
215-
list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
216-
Any,
217-
],
218-
request_id: Optional[str] = None,
219-
prompt_token_count: Optional[int] = None,
220-
output_token_count: Optional[int] = None,
221-
raw_content: bool = False,
222-
**kwargs,
223-
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
224-
"""
225-
Generate chat completions for the given content.
226-
Supports multiple modalities, complicated chat interfaces, and chat templates.
227-
Specifically, it requests with the content, which can be any combination of
228-
text, images, and audio provided the target model supports it,
229-
and returns the output text. Additionally, any chat templates
230-
for the model are applied within the backend.
231-
232-
:param content: The content (or list of content) to generate a completion for.
233-
This supports any combination of text, images, and audio (model dependent).
234-
Supported text only request examples:
235-
content="Sample prompt", content=["Sample prompt", "Second prompt"],
236-
content=[{"type": "text", "value": "Sample prompt"}.
237-
Supported text and image request examples:
238-
content=["Describe the image", PIL.Image.open("image.jpg")],
239-
content=["Describe the image", Path("image.jpg")],
240-
content=["Describe the image", {"type": "image_url",
241-
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
242-
Supported text and audio request examples:
243-
content=["Transcribe the audio", Path("audio.wav")],
244-
content=["Transcribe the audio", {"type": "input_audio",
245-
"input_audio": {"data": f"{base64_bytes}", "format": "wav}].
246-
Additionally, if raw_content=True then the content is passed directly to the
247-
backend without any processing.
248-
:param request_id: The unique identifier for the request, if any.
249-
Added to logging statements and the response for tracking purposes.
250-
:param prompt_token_count: The number of tokens measured in the prompt, if any.
251-
Returned in the response stats for later analysis, if applicable.
252-
:param output_token_count: If supplied, the number of tokens to enforce
253-
generation of for the output for this request.
254-
:param kwargs: Additional keyword arguments to pass with the request.
255-
:return: An async generator that yields a StreamingTextResponse for start,
256-
a StreamingTextResponse for each received iteration,
257-
and a ResponseSummary for the final response.
117+
:return: The default model name or identifier for generation requests.
258118
"""
259119
...

0 commit comments

Comments
 (0)