Skip to content

Commit 4a789bc

Browse files
committed
Finalize refactoring of backend package to work with new scheduler refactor
1 parent 4926284 commit 4a789bc

File tree

4 files changed

+417
-382
lines changed

4 files changed

+417
-382
lines changed

src/guidellm/backend/__init__.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,24 @@
1+
"""
2+
Backend infrastructure for GuideLLM language model interactions.
3+
4+
Provides abstract base classes, implemented backends, request/response objects,
5+
and timing utilities for standardized communication with LLM providers.
6+
"""
7+
18
from .backend import (
29
Backend,
310
BackendType,
411
)
512
from .objects import (
6-
RequestArgs,
7-
ResponseSummary,
8-
StreamingResponseType,
9-
StreamingTextResponse,
13+
GenerationRequest,
14+
GenerationRequestTimings,
15+
GenerationResponse,
1016
)
11-
from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
1217

1318
__all__ = [
14-
"CHAT_COMPLETIONS_PATH",
15-
"TEXT_COMPLETIONS_PATH",
1619
"Backend",
1720
"BackendType",
18-
"OpenAIHTTPBackend",
19-
"RequestArgs",
20-
"ResponseSummary",
21-
"StreamingResponseType",
22-
"StreamingTextResponse",
21+
"GenerationRequest",
22+
"GenerationRequestTimings",
23+
"GenerationResponse",
2324
]

src/guidellm/backend/backend.py

Lines changed: 57 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
"""
22
Backend interface and registry for generative AI model interactions.
33
4-
This module provides the abstract base class and interface for implementing
5-
backends that communicate with generative AI models. Backends handle the
6-
lifecycle of generation requests, including startup, validation, request
7-
processing, and shutdown phases.
4+
Provides the abstract base class for implementing backends that communicate with
5+
generative AI models. Backends handle the lifecycle of generation requests.
86
97
Classes:
108
Backend: Abstract base class for generative AI backends with registry support.
@@ -42,44 +40,38 @@ class Backend(
4240
"""
4341
Abstract base class for generative AI backends with registry and lifecycle.
4442
45-
This class defines the interface for implementing backends that communicate with
46-
generative AI models. It combines the registry pattern for automatic discovery
47-
with a well-defined lifecycle for process-based distributed execution.
43+
Provides a standard interface for backends that communicate with generative AI
44+
models. Combines the registry pattern for automatic discovery with a defined
45+
lifecycle for process-based distributed execution.
4846
49-
The backend lifecycle consists of four main phases:
50-
1. Creation and initial configuration (constructor and factory methods)
51-
2. Process startup - Initialize resources within a worker process
52-
3. Validation - Verify backend readiness and configuration
53-
4. Request resolution - Process generation requests iteratively
54-
5. Process shutdown - Clean up resources when process terminates
47+
Backend lifecycle phases:
48+
1. Creation and configuration
49+
2. Process startup - Initialize resources in worker process
50+
3. Validation - Verify backend readiness
51+
4. Request resolution - Process generation requests
52+
5. Process shutdown - Clean up resources
5553
56-
All backend implementations must ensure that their state (excluding resources
57-
created during process_startup) is pickleable to support transfer across
58-
process boundaries in distributed execution environments.
54+
Backend state (excluding process_startup resources) must be pickleable for
55+
distributed execution across process boundaries.
5956
6057
Example:
6158
::
62-
# Register a custom backend implementation
6359
@Backend.register("my_backend")
6460
class MyBackend(Backend):
6561
def __init__(self, api_key: str):
6662
super().__init__("my_backend")
6763
self.api_key = api_key
6864
6965
async def process_startup(self):
70-
# Initialize process-specific resources
7166
self.client = MyAPIClient(self.api_key)
7267
73-
...
74-
75-
# Create backend instance using factory method
7668
backend = Backend.create("my_backend", api_key="secret")
7769
"""
7870

7971
@classmethod
8072
def create(cls, type_: BackendType, **kwargs) -> "Backend":
8173
"""
82-
Factory method to create a backend instance based on the backend type.
74+
Create a backend instance based on the backend type.
8375
8476
:param type_: The type of backend to create.
8577
:param kwargs: Additional arguments for backend initialization.
@@ -93,65 +85,72 @@ def create(cls, type_: BackendType, **kwargs) -> "Backend":
9385

9486
def __init__(self, type_: BackendType):
9587
"""
96-
Initialize a backend instance with the specified type.
88+
Initialize a backend instance.
9789
98-
:param type_: The backend type identifier for this instance.
90+
:param type_: The backend type identifier.
9991
"""
10092
self.type_ = type_
10193

10294
@property
10395
def processes_limit(self) -> Optional[int]:
10496
"""
105-
:return: The maximum number of worker processes supported by the
106-
backend. None if not limited.
97+
:return: Maximum number of worker processes supported. None if unlimited.
10798
"""
10899
return None
109100

110101
@property
111102
def requests_limit(self) -> Optional[int]:
112103
"""
113-
:return: The maximum number of concurrent requests that can be processed
114-
at once globally by the backend. None if not limited.
104+
:return: Maximum number of concurrent requests supported globally.
105+
None if unlimited.
115106
"""
116107
return None
117108

109+
@abstractmethod
110+
def info(self) -> dict[str, Any]:
111+
"""
112+
:return: Backend metadata including model information, endpoints, and
113+
configuration data for reporting and diagnostics.
114+
"""
115+
...
116+
118117
@abstractmethod
119118
async def process_startup(self):
120119
"""
121120
Initialize process-specific resources and connections.
122121
123-
This method is called when a backend instance is transferred to a worker
124-
process and needs to establish connections, initialize clients, or set up
125-
any other resources required for request processing. All resources created
126-
here are process-local and do not need to be pickleable.
127-
If there are any errors during startup, this method should raise an
128-
appropriate exception.
122+
Called when a backend instance is transferred to a worker process.
123+
Creates connections, clients, and other resources required for request
124+
processing. Resources created here are process-local and need not be
125+
pickleable.
129126
130-
Must be called before validate() or resolve() can be used.
127+
Must be called before validate() or resolve().
128+
129+
:raises: Exception if startup fails.
131130
"""
132131
...
133132

134133
@abstractmethod
135-
async def validate(self):
134+
async def process_shutdown(self):
136135
"""
137-
Validate backend configuration and readiness for request processing.
136+
Clean up process-specific resources and connections.
138137
139-
This method verifies that the backend is properly configured and can
140-
successfully communicate with the target model service. It should be
141-
called after process_startup() and before resolve() to ensure the
142-
backend is ready to handle generation requests.
143-
If the backend cannot connect to the service or is not ready,
144-
this method should raise an appropriate exception.
138+
Called when the worker process is shutting down. Cleans up resources
139+
created during process_startup(). After this method, validate() and
140+
resolve() should not be used.
145141
"""
142+
...
146143

147144
@abstractmethod
148-
async def process_shutdown(self):
145+
async def validate(self):
149146
"""
150-
Clean up process-specific resources and connections.
147+
Validate backend configuration and readiness.
151148
152-
This method is called when the worker process is shutting down and
153-
should clean up any resources created during process_startup(). After
154-
this method is called, validate() and resolve() should not be used.
149+
Verifies the backend is properly configured and can communicate with the
150+
target model service. Should be called after process_startup() and before
151+
resolve().
152+
153+
:raises: Exception if backend is not ready or cannot connect.
155154
"""
156155
...
157156

@@ -167,37 +166,23 @@ async def resolve(
167166
"""
168167
Process a generation request and yield progressive responses.
169168
170-
This method processes a generation request through the backend's model
171-
service, yielding intermediate responses as the generation progresses.
172-
The final yielded item contains the complete response and timing data.
173-
174-
The request_info parameter is updated with timing metadata and other
175-
tracking information throughout the request processing lifecycle.
169+
Processes a generation request through the backend's model service,
170+
yielding intermediate responses as generation progresses. The final
171+
yielded item contains the complete response and timing data.
176172
177-
:param request: The generation request containing content and parameters.
178-
:param request_info: Request tracking information to be updated with
179-
timing and progress metadata during processing.
173+
:param request: The generation request with content and parameters.
174+
:param request_info: Request tracking information updated with timing
175+
and progress metadata during processing.
180176
:param history: Optional conversation history for multi-turn requests.
181-
Each tuple contains a previous request-response pair that provides
182-
context for the current generation.
183-
:yields: Tuples of (response, updated_request_info) as the generation
184-
progresses. The final tuple contains the complete response.
185-
"""
186-
...
187-
188-
@abstractmethod
189-
async def info(self) -> dict[str, Any]:
190-
"""
191-
:return: Dictionary containing backend metadata such as model
192-
information, service endpoints, version details, and other
193-
configuration data useful for reporting and diagnostics.
177+
Each tuple contains a previous request-response pair.
178+
:yields: Tuples of (response, updated_request_info) as generation
179+
progresses. Final tuple contains the complete response.
194180
"""
195181
...
196182

197183
@abstractmethod
198184
async def default_model(self) -> str:
199185
"""
200-
:return: The model name or identifier that this backend is
201-
configured to use by default for generation requests.
186+
:return: The default model name or identifier for generation requests.
202187
"""
203188
...

src/guidellm/backend/objects.py

Lines changed: 18 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
"""
2-
Backend object models for request and response handling in the GuideLLM toolkit.
2+
Backend object models for request and response handling.
33
4-
This module provides standardized models for generation requests, responses,
5-
and timing information to ensure consistent data handling across different
6-
backend implementations.
7-
8-
Classes:
9-
GenerationRequest: Request model for generation operations with content,
10-
parameters, statistics, and constraints.
11-
GenerationResponse: Response model containing generation results, token
12-
counts, timing information, and error details.
13-
GenerationRequestTimings: Timing model for tracking request lifecycle
14-
events and performance metrics.
4+
Provides standardized models for generation requests, responses, and timing
5+
information to ensure consistent data handling across different backend
6+
implementations.
157
"""
168

179
import uuid
@@ -30,74 +22,51 @@
3022

3123

3224
class GenerationRequest(StandardBaseModel):
33-
"""
34-
Request model for backend generation operations.
35-
36-
Encapsulates all necessary information for performing text or chat completion
37-
requests through backend systems.
38-
"""
25+
"""Request model for backend generation operations."""
3926

4027
request_id: str = Field(
4128
default_factory=lambda: str(uuid.uuid4()),
42-
description="The unique identifier for the request.",
29+
description="Unique identifier for the request.",
4330
)
4431
request_type: Literal["text_completions", "chat_completions"] = Field(
4532
default="text_completions",
4633
description=(
47-
"The type of request (e.g., text, chat). "
48-
"If request_type='text_completions', resolved by backend.text_completions. "
49-
"If request_typ='chat_completions', resolved by backend.chat_completions."
34+
"Type of request. 'text_completions' uses backend.text_completions(), "
35+
"'chat_completions' uses backend.chat_completions()."
5036
),
5137
)
5238
content: Any = Field(
5339
description=(
54-
"The content for the request to send to the backend. "
55-
"For request_type='text_completions', this should be a string or list "
56-
"of strings which will be resolved by backend.text_completions(). "
57-
"For request_type='chat_completions', this should be a string, "
58-
"a list of (str, Dict[str, Union[str, Dict[str, str]]], Path, Image), "
59-
"or raw content which will be resolved by backend.chat_completions(). "
60-
"For raw content, set raw_content=True in the params field."
40+
"Request content. For text_completions: string or list of strings. "
41+
"For chat_completions: string, list of messages, or raw content "
42+
"(set raw_content=True in params)."
6143
)
6244
)
6345
params: dict[str, Any] = Field(
6446
default_factory=dict,
6547
description=(
66-
"Additional parameters passed as kwargs to the backend methods. "
67-
"For HTTP backends, these are included in the request body. "
68-
"Common parameters include max_tokens, temperature, and stream."
48+
"Additional parameters passed to backend methods. "
49+
"Common: max_tokens, temperature, stream."
6950
),
7051
)
7152
stats: dict[Literal["prompt_tokens"], int] = Field(
7253
default_factory=dict,
73-
description=(
74-
"Request statistics including prompt token count. "
75-
"Used for tracking resource usage and performance analysis."
76-
),
54+
description="Request statistics including prompt token count.",
7755
)
7856
constraints: dict[Literal["output_tokens"], int] = Field(
7957
default_factory=dict,
80-
description=(
81-
"Request constraints such as maximum output tokens. "
82-
"Used to control backend generation behavior and resource limits."
83-
),
58+
description="Request constraints such as maximum output tokens.",
8459
)
8560

8661

8762
class GenerationResponse(StandardBaseModel):
88-
"""
89-
Response model for backend generation operations.
90-
91-
Contains the results of a generation request including the generated content,
92-
token usage statistics, iteration counts, and any errors encountered during
93-
processing. Supports both complete responses and streaming delta updates.
94-
"""
63+
"""Response model for backend generation operations."""
9564

9665
request_id: str = Field(
9766
description="Unique identifier matching the original GenerationRequest."
9867
)
9968
request_args: dict[str, Any] = Field(
100-
description="Arguments that were passed to the backend for this request."
69+
description="Arguments passed to the backend for this request."
10170
)
10271
value: Optional[str] = Field(
10372
default=None,
@@ -125,12 +94,7 @@ class GenerationResponse(StandardBaseModel):
12594

12695

12796
class GenerationRequestTimings(RequestTimings):
128-
"""
129-
Timing model for tracking generation request lifecycle events.
130-
131-
Extends the base RequestTimings with generation-specific timing points
132-
including first and last iteration timestamps.
133-
"""
97+
"""Timing model for tracking generation request lifecycle events."""
13498

13599
first_iteration: Optional[float] = Field(
136100
default=None,

0 commit comments

Comments
 (0)