6
6
implementations.
7
7
"""
8
8
9
- import uuid
10
- from typing import Any , Literal , Optional
9
+ from __future__ import annotations
10
+
11
+ from typing import Literal
11
12
12
13
from pydantic import Field
13
14
15
+ from guidellm .data import (
16
+ GenerationRequest ,
17
+ GenerationRequestArguments ,
18
+ GenerationRequestTimings ,
19
+ )
14
20
from guidellm .scheduler import (
15
- MeasuredRequestTimings ,
16
21
SchedulerMessagingPydanticRegistry ,
17
22
)
18
23
from guidellm .utils import StandardBaseModel
19
24
20
25
__all__ = [
21
26
"GenerationRequest" ,
27
+ "GenerationRequestArguments" ,
22
28
"GenerationRequestTimings" ,
23
29
"GenerationResponse" ,
30
+ "GenerationTokenStats" ,
24
31
]
25
32
26
33
27
34
@SchedulerMessagingPydanticRegistry .register ()
28
- class GenerationRequest (StandardBaseModel ):
29
- """Request model for backend generation operations ."""
35
+ class GenerationTokenStats (StandardBaseModel ):
36
+ """Token statistics for generation requests and responses ."""
30
37
31
- request_id : str = Field (
32
- default_factory = lambda : str (uuid .uuid4 ()),
33
- description = "Unique identifier for the request." ,
34
- )
35
- request_type : Literal ["text_completions" , "chat_completions" ] = Field (
36
- default = "text_completions" ,
37
- description = (
38
- "Type of request. 'text_completions' uses backend.text_completions(), "
39
- "'chat_completions' uses backend.chat_completions()."
40
- ),
41
- )
42
- content : Any = Field (
43
- description = (
44
- "Request content. For text_completions: string or list of strings. "
45
- "For chat_completions: string, list of messages, or raw content "
46
- "(set raw_content=True in params)."
47
- )
48
- )
49
- params : dict [str , Any ] = Field (
50
- default_factory = dict ,
51
- description = (
52
- "Additional parameters passed to backend methods. "
53
- "Common: max_tokens, temperature, stream."
54
- ),
38
+ request : int | None = Field (
39
+ default = None , description = "Number of tokens in the original request."
55
40
)
56
- stats : dict [Literal ["prompt_tokens" ], int ] = Field (
57
- default_factory = dict ,
58
- description = "Request statistics including prompt token count." ,
59
- )
60
- constraints : dict [Literal ["output_tokens" ], int ] = Field (
61
- default_factory = dict ,
62
- description = "Request constraints such as maximum output tokens." ,
41
+ response : int | None = Field (
42
+ default = None , description = "Number of tokens in the generated response."
63
43
)
64
44
45
+ def value (
46
+ self , preference : Literal ["request" , "response" ] | None = None
47
+ ) -> int | None :
48
+ if preference == "request" :
49
+ return self .request
50
+ if preference == "response" :
51
+ return self .response
52
+ return self .response if self .response is not None else self .request
53
+
65
54
66
55
@SchedulerMessagingPydanticRegistry .register ()
67
56
class GenerationResponse (StandardBaseModel ):
@@ -70,87 +59,32 @@ class GenerationResponse(StandardBaseModel):
70
59
request_id : str = Field (
71
60
description = "Unique identifier matching the original GenerationRequest."
72
61
)
73
- request_args : dict [ str , Any ] = Field (
62
+ request_args : GenerationRequestArguments = Field (
74
63
description = "Arguments passed to the backend for this request."
75
64
)
76
- value : Optional [ str ] = Field (
65
+ text : str | None = Field (
77
66
default = None ,
78
- description = "Complete generated text content. None for streaming responses." ,
79
- )
80
- delta : Optional [str ] = Field (
81
- default = None , description = "Incremental text content for streaming responses."
67
+ description = "The generated response text." ,
82
68
)
83
69
iterations : int = Field (
84
70
default = 0 , description = "Number of generation iterations completed."
85
71
)
86
- request_prompt_tokens : Optional [int ] = Field (
87
- default = None , description = "Token count from the original request prompt."
88
- )
89
- request_output_tokens : Optional [int ] = Field (
90
- default = None ,
91
- description = "Expected output token count from the original request." ,
92
- )
93
- response_prompt_tokens : Optional [int ] = Field (
94
- default = None , description = "Actual prompt token count reported by the backend."
72
+
73
+ prompt_stats : GenerationTokenStats = Field (
74
+ default_factory = GenerationTokenStats ,
75
+ description = "Token statistics from the prompt." ,
95
76
)
96
- response_output_tokens : Optional [int ] = Field (
97
- default = None , description = "Actual output token count reported by the backend."
77
+ output_stats : GenerationTokenStats = Field (
78
+ default_factory = GenerationTokenStats ,
79
+ description = "Token statistics from the generated output." ,
98
80
)
99
81
100
- @property
101
- def prompt_tokens (self ) -> Optional [int ]:
102
- """
103
- :return: The number of prompt tokens used in the request
104
- (response_prompt_tokens if available, otherwise request_prompt_tokens).
105
- """
106
- return self .response_prompt_tokens or self .request_prompt_tokens
107
-
108
- @property
109
- def output_tokens (self ) -> Optional [int ]:
110
- """
111
- :return: The number of output tokens generated in the response
112
- (response_output_tokens if available, otherwise request_output_tokens).
113
- """
114
- return self .response_output_tokens or self .request_output_tokens
115
-
116
- @property
117
- def total_tokens (self ) -> Optional [int ]:
118
- """
119
- :return: The total number of tokens used in the request and response.
120
- Sum of prompt_tokens and output_tokens.
121
- """
122
- if self .prompt_tokens is None or self .output_tokens is None :
123
- return None
124
- return self .prompt_tokens + self .output_tokens
125
-
126
- def preferred_prompt_tokens (
127
- self , preferred_source : Literal ["request" , "response" ]
128
- ) -> Optional [int ]:
129
- if preferred_source == "request" :
130
- return self .request_prompt_tokens or self .response_prompt_tokens
131
- else :
132
- return self .response_prompt_tokens or self .request_prompt_tokens
133
-
134
- def preferred_output_tokens (
135
- self , preferred_source : Literal ["request" , "response" ]
136
- ) -> Optional [int ]:
137
- if preferred_source == "request" :
138
- return self .request_output_tokens or self .response_output_tokens
139
- else :
140
- return self .response_output_tokens or self .request_output_tokens
141
-
142
-
143
- @SchedulerMessagingPydanticRegistry .register ()
144
- @MeasuredRequestTimings .register ("generation_request_timings" )
145
- class GenerationRequestTimings (MeasuredRequestTimings ):
146
- """Timing model for tracking generation request lifecycle events."""
82
+ def total_tokens (
83
+ self , preference : Literal ["request" , "response" ] | None = None
84
+ ) -> int | None :
85
+ prompt_tokens = self .prompt_stats .value (preference = preference )
86
+ output_tokens = self .output_stats .value (preference = preference )
147
87
148
- timings_type : Literal ["generation_request_timings" ] = "generation_request_timings"
149
- first_iteration : Optional [float ] = Field (
150
- default = None ,
151
- description = "Unix timestamp when the first generation iteration began." ,
152
- )
153
- last_iteration : Optional [float ] = Field (
154
- default = None ,
155
- description = "Unix timestamp when the last generation iteration completed." ,
156
- )
88
+ if prompt_tokens is None and output_tokens is None :
89
+ return None
90
+ return (prompt_tokens or 0 ) + (output_tokens or 0 )
0 commit comments