Skip to content

Commit 6ea2396

Browse files
committed
Refactor benchmark object, benchmarker, and introduce aggregators as reducer patterns for working with the new scheduler refactor
1 parent 8098f33 commit 6ea2396

File tree

17 files changed

+909
-1519
lines changed

17 files changed

+909
-1519
lines changed

src/guidellm/backend/backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ def requests_limit(self) -> Optional[int]:
109109
@abstractmethod
110110
def info(self) -> dict[str, Any]:
111111
"""
112-
:return: Backend metadata including model information, endpoints, and
113-
configuration data for reporting and diagnostics.
112+
:return: Backend metadata including model any initializaiton and
113+
configuration information.
114114
"""
115115
...
116116

src/guidellm/backend/objects.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,48 @@ class GenerationResponse(StandardBaseModel):
9292
default=None, description="Actual output token count reported by the backend."
9393
)
9494

95+
@property
96+
def prompt_tokens(self) -> Optional[int]:
97+
"""
98+
:return: The number of prompt tokens used in the request
99+
(response_prompt_tokens if available, otherwise request_prompt_tokens).
100+
"""
101+
return self.response_prompt_tokens or self.request_prompt_tokens
102+
103+
@property
104+
def output_tokens(self) -> Optional[int]:
105+
"""
106+
:return: The number of output tokens generated in the response
107+
(response_output_tokens if available, otherwise request_output_tokens).
108+
"""
109+
return self.response_output_tokens or self.request_output_tokens
110+
111+
@property
112+
def total_tokens(self) -> Optional[int]:
113+
"""
114+
:return: The total number of tokens used in the request and response.
115+
Sum of prompt_tokens and output_tokens.
116+
"""
117+
if self.prompt_tokens is None or self.output_tokens is None:
118+
return None
119+
return self.prompt_tokens + self.output_tokens
120+
121+
def preferred_prompt_tokens(
122+
self, preferred_source: Literal["request", "response"]
123+
) -> Optional[int]:
124+
if preferred_source == "request":
125+
return self.request_prompt_tokens or self.response_prompt_tokens
126+
else:
127+
return self.response_prompt_tokens or self.request_prompt_tokens
128+
129+
def preferred_output_tokens(
130+
self, preferred_source: Literal["request", "response"]
131+
) -> Optional[int]:
132+
if preferred_source == "request":
133+
return self.request_output_tokens or self.response_output_tokens
134+
else:
135+
return self.response_output_tokens or self.request_output_tokens
136+
95137

96138
class GenerationRequestTimings(RequestTimings):
97139
"""Timing model for tracking generation request lifecycle events."""

src/guidellm/benchmark/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
Benchmark,
44
BenchmarkArgs,
55
BenchmarkMetrics,
6-
BenchmarkRunStats,
6+
BenchmarkSchedulerStats,
77
BenchmarkT,
88
GenerativeBenchmark,
99
GenerativeMetrics,
10+
GenerativeRequestStats,
1011
GenerativeTextErrorStats,
11-
GenerativeTextResponseStats,
1212
StatusBreakdown,
1313
)
1414
from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
@@ -38,7 +38,7 @@
3838
"BenchmarkAggregator",
3939
"BenchmarkArgs",
4040
"BenchmarkMetrics",
41-
"BenchmarkRunStats",
41+
"BenchmarkSchedulerStats",
4242
"BenchmarkT",
4343
"Benchmarker",
4444
"BenchmarkerProgressDisplay",
@@ -51,10 +51,10 @@
5151
"GenerativeBenchmarksConsole",
5252
"GenerativeBenchmarksReport",
5353
"GenerativeMetrics",
54+
"GenerativeRequestStats",
5455
"GenerativeTextBenchmarkerProgressDisplay",
5556
"GenerativeTextBenchmarkerTaskProgressState",
5657
"GenerativeTextErrorStats",
57-
"GenerativeTextResponseStats",
5858
"Profile",
5959
"ProfileType",
6060
"StatusBreakdown",

0 commit comments

Comments
 (0)