Skip to content

Commit 1b2c185

Browse files
committed
Backend typing cleanup
1 parent d67119e commit 1b2c185

File tree

5 files changed

+33
-100
lines changed

5 files changed

+33
-100
lines changed

src/guidellm/backend/backend.py

Lines changed: 3 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,14 @@
1111
BackendType: Literal type defining supported backend implementations.
1212
"""
1313

14-
from abc import abstractmethod
15-
from collections.abc import AsyncIterator
16-
from typing import Any, Literal, Optional
14+
from typing import Literal, Optional
1715

1816
from guidellm.backend.objects import (
1917
GenerationRequest,
2018
GenerationRequestTimings,
2119
GenerationResponse,
2220
)
23-
from guidellm.scheduler import BackendInterface, ScheduledRequestInfo
21+
from guidellm.scheduler import BackendInterface
2422
from guidellm.utils.registry import RegistryMixin
2523

2624
__all__ = [
@@ -37,7 +35,7 @@ class Backend(
3735
BackendInterface[GenerationRequest, GenerationRequestTimings, GenerationResponse],
3836
):
3937
"""
40-
Abstract base class for generative AI backends with registry and lifecycle.
38+
Base class for generative AI backends with registry and lifecycle.
4139
4240
Provides a standard interface for backends that communicate with generative AI
4341
models. Combines the registry pattern for automatic discovery with a defined
@@ -104,84 +102,3 @@ def requests_limit(self) -> Optional[int]:
104102
None if unlimited.
105103
"""
106104
return None
107-
108-
@abstractmethod
109-
def info(self) -> dict[str, Any]:
110-
"""
111-
:return: Backend metadata including model any initializaiton and
112-
configuration information.
113-
"""
114-
...
115-
116-
@abstractmethod
117-
async def process_startup(self):
118-
"""
119-
Initialize process-specific resources and connections.
120-
121-
Called when a backend instance is transferred to a worker process.
122-
Creates connections, clients, and other resources required for request
123-
processing. Resources created here are process-local and need not be
124-
pickleable.
125-
126-
Must be called before validate() or resolve().
127-
128-
:raises: Exception if startup fails.
129-
"""
130-
...
131-
132-
@abstractmethod
133-
async def process_shutdown(self):
134-
"""
135-
Clean up process-specific resources and connections.
136-
137-
Called when the worker process is shutting down. Cleans up resources
138-
created during process_startup(). After this method, validate() and
139-
resolve() should not be used.
140-
"""
141-
...
142-
143-
@abstractmethod
144-
async def validate(self):
145-
"""
146-
Validate backend configuration and readiness.
147-
148-
Verifies the backend is properly configured and can communicate with the
149-
target model service. Should be called after process_startup() and before
150-
resolve().
151-
152-
:raises: Exception if backend is not ready or cannot connect.
153-
"""
154-
...
155-
156-
@abstractmethod
157-
async def resolve(
158-
self,
159-
request: GenerationRequest,
160-
request_info: ScheduledRequestInfo[GenerationRequestTimings],
161-
history: Optional[list[tuple[GenerationRequest, GenerationResponse]]] = None,
162-
) -> AsyncIterator[
163-
tuple[GenerationResponse, ScheduledRequestInfo[GenerationRequestTimings]]
164-
]:
165-
"""
166-
Process a generation request and yield progressive responses.
167-
168-
Processes a generation request through the backend's model service,
169-
yielding intermediate responses as generation progresses. The final
170-
yielded item contains the complete response and timing data.
171-
172-
:param request: The generation request with content and parameters.
173-
:param request_info: Request tracking information updated with timing
174-
and progress metadata during processing.
175-
:param history: Optional conversation history for multi-turn requests.
176-
Each tuple contains a previous request-response pair.
177-
:yields: Tuples of (response, updated_request_info) as generation
178-
progresses. Final tuple contains the complete response.
179-
"""
180-
...
181-
182-
@abstractmethod
183-
async def default_model(self) -> str:
184-
"""
185-
:return: The default model name or identifier for generation requests.
186-
"""
187-
...

src/guidellm/backend/interface.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@ class BackendInterface(ABC, Generic[RequestT, MeasuredRequestTimingsT, ResponseT
2525
@abstractmethod
2626
def processes_limit(self) -> Optional[int]:
2727
"""Maximum worker processes supported, or None if unlimited."""
28+
...
2829

2930
@property
3031
@abstractmethod
3132
def requests_limit(self) -> Optional[int]:
3233
"""Maximum concurrent requests supported, or None if unlimited."""
34+
...
3335

3436
@abstractmethod
3537
def info(self) -> dict[str, Any]:
@@ -46,6 +48,7 @@ async def process_startup(self) -> None:
4648
4749
:raises: Implementation-specific exceptions for startup failures.
4850
"""
51+
...
4952

5053
@abstractmethod
5154
async def validate(self) -> None:
@@ -54,6 +57,7 @@ async def validate(self) -> None:
5457
5558
:raises: Implementation-specific exceptions for validation failures.
5659
"""
60+
...
5761

5862
@abstractmethod
5963
async def process_shutdown(self) -> None:
@@ -62,9 +66,10 @@ async def process_shutdown(self) -> None:
6266
6367
:raises: Implementation-specific exceptions for shutdown failures.
6468
"""
69+
...
6570

6671
@abstractmethod
67-
async def resolve(
72+
def resolve(
6873
self,
6974
request: RequestT,
7075
request_info: ScheduledRequestInfo[MeasuredRequestTimingsT],
@@ -79,6 +84,14 @@ async def resolve(
7984
:yield: Tuples of (response, updated_request_info) for each response chunk.
8085
:raises: Implementation-specific exceptions for processing failures.
8186
"""
87+
...
88+
89+
@abstractmethod
90+
async def default_model(self) -> Optional[str]:
91+
"""
92+
:return: The default model name or identifier for generation requests.
93+
"""
94+
...
8295

8396

8497
BackendT = TypeVar("BackendT", bound="BackendInterface")

src/guidellm/backend/openai.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def __init__(
139139
self._in_process = False
140140
self._async_client: Optional[httpx.AsyncClient] = None
141141

142-
async def info(self) -> dict[str, Any]:
142+
def info(self) -> dict[str, Any]:
143143
"""
144144
:return: Dictionary containing backend configuration details.
145145
"""
@@ -190,7 +190,7 @@ async def process_shutdown(self):
190190
if not self._in_process:
191191
raise RuntimeError("Backend not started up for process.")
192192

193-
await self._async_client.aclose()
193+
await self._async_client.aclose() # type: ignore [union-attr]
194194
self._async_client = None
195195
self._in_process = False
196196

@@ -210,7 +210,7 @@ async def validate(self):
210210
# Model is set, use /health endpoint as first check
211211
target = f"{self.target}{self.HEALTH_PATH}"
212212
headers = self._get_headers()
213-
response = await self._async_client.get(target, headers=headers)
213+
response = await self._async_client.get(target, headers=headers) # type: ignore [union-attr]
214214
response.raise_for_status()
215215

216216
return
@@ -258,7 +258,7 @@ async def available_models(self) -> list[str]:
258258
target = f"{self.target}{self.MODELS_PATH}"
259259
headers = self._get_headers()
260260
params = self._get_params(self.MODELS_KEY)
261-
response = await self._async_client.get(target, headers=headers, params=params)
261+
response = await self._async_client.get(target, headers=headers, params=params) # type: ignore [union-attr]
262262
response.raise_for_status()
263263

264264
return [item["id"] for item in response.json()["data"]]
@@ -305,7 +305,7 @@ async def resolve(
305305
request_id=request.request_id,
306306
request_args={
307307
"request_type": request.request_type,
308-
"output_token_count": request.constraints.get("max_output_tokens"),
308+
"output_token_count": request.constraints.get("output_tokens"),
309309
**request.params,
310310
},
311311
value="",
@@ -324,15 +324,15 @@ async def resolve(
324324
{
325325
"prompt": request.content,
326326
"request_id": request.request_id,
327-
"output_token_count": request.constraints.get("max_output_tokens"),
327+
"output_token_count": request.constraints.get("output_tokens"),
328328
"stream_response": request.params.get("stream", self.stream_response),
329329
**request.params,
330330
}
331331
if request.request_type == "text_completions"
332332
else {
333333
"content": request.content,
334334
"request_id": request.request_id,
335-
"output_token_count": request.constraints.get("max_output_tokens"),
335+
"output_token_count": request.constraints.get("output_tokens"),
336336
"stream_response": request.params.get("stream", self.stream_response),
337337
**request.params,
338338
}
@@ -345,7 +345,7 @@ async def resolve(
345345
if delta is not None:
346346
if request_info.request_timings.first_iteration is None:
347347
request_info.request_timings.first_iteration = time.time()
348-
response.value += delta
348+
response.value += delta # type: ignore [operator]
349349
response.delta = delta
350350
request_info.request_timings.last_iteration = time.time()
351351
response.iterations += 1
@@ -396,7 +396,7 @@ async def text_completions(
396396
yield None, None # Initial yield for async iterator to signal start
397397

398398
if not stream_response:
399-
response = await self._async_client.post(
399+
response = await self._async_client.post( # type: ignore [union-attr]
400400
target,
401401
headers=headers,
402402
params=params,
@@ -411,7 +411,7 @@ async def text_completions(
411411
return
412412

413413
body.update({"stream": True, "stream_options": {"include_usage": True}})
414-
async with self._async_client.stream(
414+
async with self._async_client.stream( # type: ignore [union-attr]
415415
"POST",
416416
target,
417417
headers=headers,
@@ -474,7 +474,7 @@ async def chat_completions(
474474
yield None, None # Initial yield for async iterator to signal start
475475

476476
if not stream_response:
477-
response = await self._async_client.post(
477+
response = await self._async_client.post( # type: ignore [union-attr]
478478
target, headers=headers, params=params, json=body
479479
)
480480
response.raise_for_status()
@@ -486,7 +486,7 @@ async def chat_completions(
486486
return
487487

488488
body.update({"stream": True, "stream_options": {"include_usage": True}})
489-
async with self._async_client.stream(
489+
async with self._async_client.stream( # type: ignore [union-attr]
490490
"POST", target, headers=headers, params=params, json=body
491491
) as stream:
492492
stream.raise_for_status()

tests/unit/backend/test_interface.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ async def process_shutdown(self) -> None:
6060
async def resolve(self, request, request_info, history=None):
6161
yield request, request_info
6262

63+
async def default_model(self) -> str:
64+
return "my-model"
65+
6366
# Should be able to instantiate
6467
backend = MinimalBackend()
6568
assert backend is not None

tests/unit/backend/test_openai_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ async def test_openai_backend_info(self):
132132
target="http://test", model="test-model", timeout=30.0
133133
)
134134

135-
info = await backend.info()
135+
info = backend.info()
136136

137137
assert info["target"] == "http://test"
138138
assert info["model"] == "test-model"

0 commit comments

Comments
 (0)