11"""
22Response handlers for processing API responses from different generation backends.
33
4- This module provides a pluggable system for handling responses from various language
5- model backends, supporting both streaming and non-streaming responses. Each handler
6- implements the GenerationResponseHandler protocol to parse API responses, extract
7- usage metrics, and convert them into standardized GenerationResponse objects for the
8- benchmark system.
4+ Provides a pluggable system for handling responses from language model backends,
5+ supporting both streaming and non-streaming responses. Each handler implements the
6+ GenerationResponseHandler protocol to parse API responses, extract usage metrics,
7+ and convert them into standardized GenerationResponse objects.
98"""
109
1110from __future__ import annotations
2625
2726class GenerationResponseHandler (Protocol ):
2827 """
29- Protocol defining the interface for handling generation API responses.
28+ Protocol for handling generation API responses.
3029
31- Response handlers implement this protocol to process both streaming and
32- non-streaming responses from different backend APIs, converting them into
33- standardized GenerationResponse objects with consistent metrics extraction.
30+ Defines the interface for processing both streaming and non-streaming responses
31+ from backend APIs, converting them into standardized GenerationResponse objects
32+ with consistent metrics extraction.
3433 """
3534
3635 def compile_non_streaming (
@@ -39,7 +38,7 @@ def compile_non_streaming(
3938 """
4039 Process a complete non-streaming API response.
4140
42- :param request: The original generation request
41+ :param request: Original generation request
4342 :param response: Raw API response data from the backend
4443 :return: Standardized GenerationResponse with extracted metrics
4544 """
@@ -58,7 +57,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
5857 """
5958 Compile accumulated streaming data into a final response.
6059
61- :param request: The original generation request
60+ :param request: Original generation request
6261 :return: Standardized GenerationResponse with extracted metrics
6362 """
6463 ...
@@ -68,9 +67,9 @@ class GenerationResponseHandlerFactory(RegistryMixin[type[GenerationResponseHand
6867 """
6968 Factory for registering and creating response handlers by backend type.
7069
71- Provides a registry -based system for associating handler classes with specific
72- backend API types, enabling automatic selection of the appropriate handler
73- for processing responses from different generation services.
70+ Registry -based system for associating handler classes with specific backend API
71+ types, enabling automatic selection of the appropriate handler for processing
72+ responses from different generation services.
7473 """
7574
7675
@@ -79,9 +78,9 @@ class TextCompletionsResponseHandler(GenerationResponseHandler):
7978 """
8079 Response handler for OpenAI-style text completion endpoints.
8180
82- Processes responses from text completion APIs that return generated text
83- in the 'choices' array with 'text' fields. Handles both streaming and
84- non-streaming responses, extracting usage metrics for input and output tokens.
81+ Processes responses from text completion APIs that return generated text in the
82+ 'choices' array with 'text' fields. Handles both streaming and non-streaming
83+ responses, extracting usage metrics for input and output tokens.
8584
8685 Example:
8786 ::
@@ -105,7 +104,7 @@ def compile_non_streaming(
105104 """
106105 Process a complete text completion response.
107106
108- :param request: The original generation request
107+ :param request: Original generation request
109108 :param response: Complete API response containing choices and usage data
110109 :return: Standardized GenerationResponse with extracted text and metrics
111110 """
@@ -151,7 +150,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
151150 """
152151 Compile accumulated streaming text chunks into a final response.
153152
154- :param request: The original generation request
153+ :param request: Original generation request
155154 :return: Standardized GenerationResponse with concatenated text and metrics
156155 """
157156 input_metrics , output_metrics = self .extract_metrics (self .streaming_usage )
@@ -171,7 +170,7 @@ def extract_line_data(self, line: str) -> dict[str, Any] | None:
171170 Extract JSON data from a streaming response line.
172171
173172 :param line: Raw line from the streaming response
174- :return: Parsed JSON data as a dictionary, or None if line is invalid
173+ :return: Parsed JSON data as dictionary, or None if line indicates completion
175174 """
176175 if line == "data: [DONE]" :
177176 return None
@@ -190,7 +189,7 @@ def extract_choices_and_usage(
190189 Extract choices and usage data from the API response.
191190
192191 :param response: Complete API response containing choices and usage data
193- :return: Tuple of ( choices list, usage dictionary)
192+ :return: Tuple of choices list and usage dictionary
194193 """
195194 return response .get ("choices" , []), response .get ("usage" , {})
196195
@@ -201,7 +200,7 @@ def extract_metrics(
201200 Extract input and output usage metrics from API response usage data.
202201
203202 :param usage: Usage data dictionary from API response
204- :return: Tuple of ( input_metrics, output_metrics) as UsageMetrics objects
203+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
205204 """
206205 if not usage :
207206 return UsageMetrics (), UsageMetrics ()
@@ -236,9 +235,9 @@ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
236235 """
237236 Response handler for OpenAI-style chat completion endpoints.
238237
239- Extends TextCompletionsResponseHandler to handle chat completion responses
240- where generated text is nested within message objects in the choices array.
241- Processes both streaming and non-streaming chat completion responses.
238+ Extends TextCompletionsResponseHandler to handle chat completion responses where
239+ generated text is nested within message objects in the choices array. Processes
240+ both streaming and non-streaming chat completion responses.
242241 """
243242
244243 def compile_non_streaming (
@@ -247,10 +246,10 @@ def compile_non_streaming(
247246 """
248247 Process a complete chat completion response.
249248
250- Extracts content from the message object within choices, handling the
251- nested structure specific to chat completion endpoints.
249+ Extracts content from the message object within choices, handling the nested
250+ structure specific to chat completion endpoints.
252251
253- :param request: The original generation request
252+ :param request: Original generation request
254253 :param response: Complete API response containing choices and usage data
255254 :return: Standardized GenerationResponse with extracted content and metrics
256255 """
@@ -271,8 +270,8 @@ def add_streaming_line(self, line: str) -> int | None:
271270 """
272271 Process a single line from a chat completion streaming response.
273272
274- Handles the chat completion specific delta structure where content
275- is nested within delta objects in the streaming response chunks.
273+ Handles the chat completion specific delta structure where content is nested
274+ within delta objects in the streaming response chunks.
276275
277276 :param line: Raw SSE line from the streaming response
278277 :return: 1 if content was extracted, 0 if line ignored, None if done
@@ -296,7 +295,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
296295 """
297296 Compile accumulated streaming chat completion content into a final response.
298297
299- :param request: The original generation request
298+ :param request: Original generation request
300299 :return: Standardized GenerationResponse with concatenated content and metrics
301300 """
302301 input_metrics , output_metrics = self .extract_metrics (self .streaming_usage )
@@ -349,7 +348,7 @@ def compile_non_streaming(
349348 Extracts transcribed or translated text and audio-specific usage metrics
350349 including processing duration and token counts for audio content.
351350
352- :param request: The original generation request
351+ :param request: Original generation request
353352 :param response: Complete API response containing text and usage data
354353 :return: Standardized GenerationResponse with extracted text and metrics
355354 """
@@ -412,7 +411,7 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
412411 """
413412 Compile accumulated streaming audio text into a final response.
414413
415- :param request: The original generation request
414+ :param request: Original generation request
416415 :return: Standardized GenerationResponse with concatenated text and metrics
417416 """
418417 input_metrics , output_metrics = self .extract_metrics (self .streaming_usage )
@@ -437,7 +436,7 @@ def extract_metrics(
437436 in addition to standard text token counts.
438437
439438 :param usage: Usage data dictionary from audio API response
440- :return: Tuple of ( input_metrics, output_metrics) as UsageMetrics objects
439+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
441440 """
442441 if not usage :
443442 return UsageMetrics (), UsageMetrics ()
0 commit comments