Skip to content

Commit 7e546e2

Browse files
committed
Migrate remaining code off of requests.arguments
Signed-off-by: Samuel Monson <[email protected]>
1 parent f3e6c4a commit 7e546e2

File tree

4 files changed

+44
-33
lines changed

4 files changed

+44
-33
lines changed

src/guidellm/backends/openai.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,10 @@ async def resolve( # type: ignore[override]
289289
request_info.timings.request_end = time.time()
290290
response.raise_for_status()
291291
data = response.json()
292-
yield response_handler.compile_non_streaming(request, data), request_info
292+
yield (
293+
response_handler.compile_non_streaming(request, arguments, data),
294+
request_info,
295+
)
293296
return
294297

295298
try:
@@ -328,10 +331,10 @@ async def resolve( # type: ignore[override]
328331
request_info.timings.token_iterations += iterations
329332

330333
request_info.timings.request_end = time.time()
331-
yield response_handler.compile_streaming(request), request_info
334+
yield response_handler.compile_streaming(request, arguments), request_info
332335
except asyncio.CancelledError as err:
333336
# Yield current result to store iterative results before propagating
334-
yield response_handler.compile_streaming(request), request_info
337+
yield response_handler.compile_streaming(request, arguments), request_info
335338
raise err
336339

337340
def _resolve_validate_kwargs(

src/guidellm/backends/response_handlers.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from typing import Any, Protocol, cast
1313

1414
from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics
15+
from guidellm.schemas.request import GenerationRequestArguments
1516
from guidellm.utils import RegistryMixin, json
1617

1718
__all__ = [
@@ -33,7 +34,10 @@ class GenerationResponseHandler(Protocol):
3334
"""
3435

3536
def compile_non_streaming(
36-
self, request: GenerationRequest, response: Any
37+
self,
38+
request: GenerationRequest,
39+
arguments: GenerationRequestArguments,
40+
response: Any,
3741
) -> GenerationResponse:
3842
"""
3943
Process a complete non-streaming API response.
@@ -53,7 +57,9 @@ def add_streaming_line(self, line: str) -> int | None:
5357
"""
5458
...
5559

56-
def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
60+
def compile_streaming(
61+
self, request: GenerationRequest, arguments: GenerationRequestArguments
62+
) -> GenerationResponse:
5763
"""
5864
Compile accumulated streaming data into a final response.
5965
@@ -127,7 +133,10 @@ def __init__(self):
127133
self.streaming_response_id: str | None = None
128134

129135
def compile_non_streaming(
130-
self, request: GenerationRequest, response: dict
136+
self,
137+
request: GenerationRequest,
138+
arguments: GenerationRequestArguments,
139+
response: dict,
131140
) -> GenerationResponse:
132141
"""
133142
Process a complete text completion response.
@@ -143,9 +152,7 @@ def compile_non_streaming(
143152

144153
return GenerationResponse(
145154
request_id=request.request_id,
146-
request_args=str(
147-
request.arguments.model_dump() if request.arguments else None
148-
),
155+
request_args=arguments.model_dump_json(),
149156
response_id=response.get("id"), # use vLLM ID if available
150157
text=text,
151158
input_metrics=input_metrics,
@@ -181,7 +188,9 @@ def add_streaming_line(self, line: str) -> int | None:
181188

182189
return 1 if updated else 0
183190

184-
def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
191+
def compile_streaming(
192+
self, request: GenerationRequest, arguments: GenerationRequestArguments
193+
) -> GenerationResponse:
185194
"""
186195
Compile accumulated streaming text chunks into a final response.
187196
@@ -193,9 +202,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
193202

194203
return GenerationResponse(
195204
request_id=request.request_id,
196-
request_args=str(
197-
request.arguments.model_dump() if request.arguments else None
198-
),
205+
request_args=arguments.model_dump_json(),
199206
response_id=self.streaming_response_id, # use vLLM ID if available
200207
text=text,
201208
input_metrics=input_metrics,
@@ -290,7 +297,10 @@ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
290297
"""
291298

292299
def compile_non_streaming(
293-
self, request: GenerationRequest, response: dict
300+
self,
301+
request: GenerationRequest,
302+
arguments: GenerationRequestArguments,
303+
response: dict,
294304
) -> GenerationResponse:
295305
"""
296306
Process a complete chat completion response.
@@ -309,9 +319,7 @@ def compile_non_streaming(
309319

310320
return GenerationResponse(
311321
request_id=request.request_id,
312-
request_args=str(
313-
request.arguments.model_dump() if request.arguments else None
314-
),
322+
request_args=arguments.model_dump_json(),
315323
response_id=response.get("id"), # use vLLM ID if available
316324
text=text,
317325
input_metrics=input_metrics,
@@ -347,7 +355,9 @@ def add_streaming_line(self, line: str) -> int | None:
347355

348356
return 1 if updated else 0
349357

350-
def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
358+
def compile_streaming(
359+
self, request: GenerationRequest, arguments: GenerationRequestArguments
360+
) -> GenerationResponse:
351361
"""
352362
Compile accumulated streaming chat completion content into a final response.
353363
@@ -359,9 +369,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
359369

360370
return GenerationResponse(
361371
request_id=request.request_id,
362-
request_args=str(
363-
request.arguments.model_dump() if request.arguments else None
364-
),
372+
request_args=arguments.model_dump_json(),
365373
response_id=self.streaming_response_id, # use vLLM ID if available
366374
text=text,
367375
input_metrics=input_metrics,
@@ -399,7 +407,10 @@ def __init__(self):
399407
self.streaming_response_id: str | None = None
400408

401409
def compile_non_streaming(
402-
self, request: GenerationRequest, response: dict
410+
self,
411+
request: GenerationRequest,
412+
arguments: GenerationRequestArguments,
413+
response: dict,
403414
) -> GenerationResponse:
404415
"""
405416
Process a complete audio transcription or translation response.
@@ -417,9 +428,7 @@ def compile_non_streaming(
417428

418429
return GenerationResponse(
419430
request_id=request.request_id,
420-
request_args=str(
421-
request.arguments.model_dump() if request.arguments else None
422-
),
431+
request_args=arguments.model_dump_json(),
423432
response_id=response.get("id"), # use vLLM ID if available
424433
text=text,
425434
input_metrics=input_metrics,
@@ -457,7 +466,9 @@ def add_streaming_line(self, line: str) -> int | None:
457466

458467
return 1 if updated else 0
459468

460-
def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
469+
def compile_streaming(
470+
self, request: GenerationRequest, arguments: GenerationRequestArguments
471+
) -> GenerationResponse:
461472
"""
462473
Compile accumulated streaming audio text into a final response.
463474
@@ -469,9 +480,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
469480

470481
return GenerationResponse(
471482
request_id=request.request_id,
472-
request_args=str(
473-
request.arguments.model_dump() if request.arguments else None
474-
),
483+
request_args=arguments.model_dump_json(),
475484
response_id=self.streaming_response_id,
476485
text=text,
477486
input_metrics=input_metrics,

src/guidellm/benchmark/schemas/generative/accumulator.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,9 @@ def compile_stats(
722722
)
723723

724724
if response is None:
725+
# FIXME: request_args is wrong
725726
response = GenerationResponse(
726-
request_id=info.request_id, request_args=str(first_request.arguments)
727+
request_id=info.request_id, request_args=str(first_request.columns)
727728
)
728729

729730
return response.compile_stats(

src/guidellm/schemas/response.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ def compile_stats(
114114
request_id=self.request_id,
115115
response_id=self.response_id,
116116
request_type=request.request_type,
117-
request_args=str(
118-
request.arguments.model_dump() if request.arguments else {}
119-
),
117+
request_args=self.request_args,
120118
output=self.text,
121119
info=info,
122120
input_metrics=UsageMetrics(**input_metrics_dict),

0 commit comments

Comments
 (0)