Skip to content

Commit 4926284

Browse files
committed
Initial refactoring of backend and generation objects to work with new scheduler refactor
1 parent a5ccd70 commit 4926284

File tree

16 files changed

+1345
-1192
lines changed

16 files changed

+1345
-1192
lines changed

src/guidellm/backend/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
Backend,
33
BackendType,
44
)
5-
from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
6-
from .response import (
5+
from .objects import (
76
RequestArgs,
87
ResponseSummary,
98
StreamingResponseType,
109
StreamingTextResponse,
1110
)
11+
from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
1212

1313
__all__ = [
1414
"CHAT_COMPLETIONS_PATH",

src/guidellm/backend/backend.py

Lines changed: 134 additions & 190 deletions
Large diffs are not rendered by default.

src/guidellm/backend/objects.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
"""
2+
Backend object models for request and response handling in the GuideLLM toolkit.
3+
4+
This module provides standardized models for generation requests, responses,
5+
and timing information to ensure consistent data handling across different
6+
backend implementations.
7+
8+
Classes:
9+
GenerationRequest: Request model for generation operations with content,
10+
parameters, statistics, and constraints.
11+
GenerationResponse: Response model containing generation results, token
12+
counts, timing information, and error details.
13+
GenerationRequestTimings: Timing model for tracking request lifecycle
14+
events and performance metrics.
15+
"""
16+
17+
import uuid
18+
from typing import Any, Literal, Optional
19+
20+
from pydantic import Field
21+
22+
from guidellm.objects.pydantic import StandardBaseModel
23+
from guidellm.scheduler import RequestTimings
24+
25+
__all__ = [
26+
"GenerationRequest",
27+
"GenerationRequestTimings",
28+
"GenerationResponse",
29+
]
30+
31+
32+
class GenerationRequest(StandardBaseModel):
33+
"""
34+
Request model for backend generation operations.
35+
36+
Encapsulates all necessary information for performing text or chat completion
37+
requests through backend systems.
38+
"""
39+
40+
request_id: str = Field(
41+
default_factory=lambda: str(uuid.uuid4()),
42+
description="The unique identifier for the request.",
43+
)
44+
request_type: Literal["text_completions", "chat_completions"] = Field(
45+
default="text_completions",
46+
description=(
47+
"The type of request (e.g., text, chat). "
48+
"If request_type='text_completions', resolved by backend.text_completions. "
49+
"If request_typ='chat_completions', resolved by backend.chat_completions."
50+
),
51+
)
52+
content: Any = Field(
53+
description=(
54+
"The content for the request to send to the backend. "
55+
"For request_type='text_completions', this should be a string or list "
56+
"of strings which will be resolved by backend.text_completions(). "
57+
"For request_type='chat_completions', this should be a string, "
58+
"a list of (str, Dict[str, Union[str, Dict[str, str]]], Path, Image), "
59+
"or raw content which will be resolved by backend.chat_completions(). "
60+
"For raw content, set raw_content=True in the params field."
61+
)
62+
)
63+
params: dict[str, Any] = Field(
64+
default_factory=dict,
65+
description=(
66+
"Additional parameters passed as kwargs to the backend methods. "
67+
"For HTTP backends, these are included in the request body. "
68+
"Common parameters include max_tokens, temperature, and stream."
69+
),
70+
)
71+
stats: dict[Literal["prompt_tokens"], int] = Field(
72+
default_factory=dict,
73+
description=(
74+
"Request statistics including prompt token count. "
75+
"Used for tracking resource usage and performance analysis."
76+
),
77+
)
78+
constraints: dict[Literal["output_tokens"], int] = Field(
79+
default_factory=dict,
80+
description=(
81+
"Request constraints such as maximum output tokens. "
82+
"Used to control backend generation behavior and resource limits."
83+
),
84+
)
85+
86+
87+
class GenerationResponse(StandardBaseModel):
88+
"""
89+
Response model for backend generation operations.
90+
91+
Contains the results of a generation request including the generated content,
92+
token usage statistics, iteration counts, and any errors encountered during
93+
processing. Supports both complete responses and streaming delta updates.
94+
"""
95+
96+
request_id: str = Field(
97+
description="Unique identifier matching the original GenerationRequest."
98+
)
99+
request_args: dict[str, Any] = Field(
100+
description="Arguments that were passed to the backend for this request."
101+
)
102+
value: Optional[str] = Field(
103+
default=None,
104+
description="Complete generated text content. None for streaming responses.",
105+
)
106+
delta: Optional[str] = Field(
107+
default=None, description="Incremental text content for streaming responses."
108+
)
109+
iterations: int = Field(
110+
default=0, description="Number of generation iterations completed."
111+
)
112+
request_prompt_tokens: Optional[int] = Field(
113+
default=None, description="Token count from the original request prompt."
114+
)
115+
request_output_tokens: Optional[int] = Field(
116+
default=None,
117+
description="Expected output token count from the original request.",
118+
)
119+
response_prompt_tokens: Optional[int] = Field(
120+
default=None, description="Actual prompt token count reported by the backend."
121+
)
122+
response_output_tokens: Optional[int] = Field(
123+
default=None, description="Actual output token count reported by the backend."
124+
)
125+
126+
127+
class GenerationRequestTimings(RequestTimings):
128+
"""
129+
Timing model for tracking generation request lifecycle events.
130+
131+
Extends the base RequestTimings with generation-specific timing points
132+
including first and last iteration timestamps.
133+
"""
134+
135+
first_iteration: Optional[float] = Field(
136+
default=None,
137+
description="Unix timestamp when the first generation iteration began.",
138+
)
139+
last_iteration: Optional[float] = Field(
140+
default=None,
141+
description="Unix timestamp when the last generation iteration completed.",
142+
)

0 commit comments

Comments
 (0)