1212from typing import Any , Protocol , cast
1313
1414from guidellm .schemas import GenerationRequest , GenerationResponse , UsageMetrics
15+ from guidellm .schemas .request import GenerationRequestArguments
1516from guidellm .utils import RegistryMixin , json
1617
1718__all__ = [
@@ -33,7 +34,10 @@ class GenerationResponseHandler(Protocol):
3334 """
3435
3536 def compile_non_streaming (
36- self , request : GenerationRequest , response : Any
37+ self ,
38+ request : GenerationRequest ,
39+ arguments : GenerationRequestArguments ,
40+ response : Any ,
3741 ) -> GenerationResponse :
3842 """
3943 Process a complete non-streaming API response.
@@ -53,7 +57,9 @@ def add_streaming_line(self, line: str) -> int | None:
5357 """
5458 ...
5559
56- def compile_streaming (self , request : GenerationRequest ) -> GenerationResponse :
60+ def compile_streaming (
61+ self , request : GenerationRequest , arguments : GenerationRequestArguments
62+ ) -> GenerationResponse :
5763 """
5864 Compile accumulated streaming data into a final response.
5965
@@ -127,7 +133,10 @@ def __init__(self):
127133 self .streaming_response_id : str | None = None
128134
129135 def compile_non_streaming (
130- self , request : GenerationRequest , response : dict
136+ self ,
137+ request : GenerationRequest ,
138+ arguments : GenerationRequestArguments ,
139+ response : dict ,
131140 ) -> GenerationResponse :
132141 """
133142 Process a complete text completion response.
@@ -143,9 +152,7 @@ def compile_non_streaming(
143152
144153 return GenerationResponse (
145154 request_id = request .request_id ,
146- request_args = str (
147- request .arguments .model_dump () if request .arguments else None
148- ),
155+ request_args = arguments .model_dump_json (),
149156 response_id = response .get ("id" ), # use vLLM ID if available
150157 text = text ,
151158 input_metrics = input_metrics ,
@@ -181,7 +188,9 @@ def add_streaming_line(self, line: str) -> int | None:
181188
182189 return 1 if updated else 0
183190
184- def compile_streaming (self , request : GenerationRequest ) -> GenerationResponse :
191+ def compile_streaming (
192+ self , request : GenerationRequest , arguments : GenerationRequestArguments
193+ ) -> GenerationResponse :
185194 """
186195 Compile accumulated streaming text chunks into a final response.
187196
@@ -193,9 +202,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
193202
194203 return GenerationResponse (
195204 request_id = request .request_id ,
196- request_args = str (
197- request .arguments .model_dump () if request .arguments else None
198- ),
205+ request_args = arguments .model_dump_json (),
199206 response_id = self .streaming_response_id , # use vLLM ID if available
200207 text = text ,
201208 input_metrics = input_metrics ,
@@ -290,7 +297,10 @@ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
290297 """
291298
292299 def compile_non_streaming (
293- self , request : GenerationRequest , response : dict
300+ self ,
301+ request : GenerationRequest ,
302+ arguments : GenerationRequestArguments ,
303+ response : dict ,
294304 ) -> GenerationResponse :
295305 """
296306 Process a complete chat completion response.
@@ -309,9 +319,7 @@ def compile_non_streaming(
309319
310320 return GenerationResponse (
311321 request_id = request .request_id ,
312- request_args = str (
313- request .arguments .model_dump () if request .arguments else None
314- ),
322+ request_args = arguments .model_dump_json (),
315323 response_id = response .get ("id" ), # use vLLM ID if available
316324 text = text ,
317325 input_metrics = input_metrics ,
@@ -347,7 +355,9 @@ def add_streaming_line(self, line: str) -> int | None:
347355
348356 return 1 if updated else 0
349357
350- def compile_streaming (self , request : GenerationRequest ) -> GenerationResponse :
358+ def compile_streaming (
359+ self , request : GenerationRequest , arguments : GenerationRequestArguments
360+ ) -> GenerationResponse :
351361 """
352362 Compile accumulated streaming chat completion content into a final response.
353363
@@ -359,9 +369,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
359369
360370 return GenerationResponse (
361371 request_id = request .request_id ,
362- request_args = str (
363- request .arguments .model_dump () if request .arguments else None
364- ),
372+ request_args = arguments .model_dump_json (),
365373 response_id = self .streaming_response_id , # use vLLM ID if available
366374 text = text ,
367375 input_metrics = input_metrics ,
@@ -399,7 +407,10 @@ def __init__(self):
399407 self .streaming_response_id : str | None = None
400408
401409 def compile_non_streaming (
402- self , request : GenerationRequest , response : dict
410+ self ,
411+ request : GenerationRequest ,
412+ arguments : GenerationRequestArguments ,
413+ response : dict ,
403414 ) -> GenerationResponse :
404415 """
405416 Process a complete audio transcription or translation response.
@@ -417,9 +428,7 @@ def compile_non_streaming(
417428
418429 return GenerationResponse (
419430 request_id = request .request_id ,
420- request_args = str (
421- request .arguments .model_dump () if request .arguments else None
422- ),
431+ request_args = arguments .model_dump_json (),
423432 response_id = response .get ("id" ), # use vLLM ID if available
424433 text = text ,
425434 input_metrics = input_metrics ,
@@ -457,7 +466,9 @@ def add_streaming_line(self, line: str) -> int | None:
457466
458467 return 1 if updated else 0
459468
460- def compile_streaming (self , request : GenerationRequest ) -> GenerationResponse :
469+ def compile_streaming (
470+ self , request : GenerationRequest , arguments : GenerationRequestArguments
471+ ) -> GenerationResponse :
461472 """
462473 Compile accumulated streaming audio text into a final response.
463474
@@ -469,9 +480,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
469480
470481 return GenerationResponse (
471482 request_id = request .request_id ,
472- request_args = str (
473- request .arguments .model_dump () if request .arguments else None
474- ),
483+ request_args = arguments .model_dump_json (),
475484 response_id = self .streaming_response_id ,
476485 text = text ,
477486 input_metrics = input_metrics ,
0 commit comments