Skip to content

Commit 3c0bc9e

Browse files
authored
fix: gracefully handle erroneous SSE responses (#440)
* gracefully handle erroneous responses * remove unused import * display error at a central location
1 parent bfcca04 commit 3c0bc9e

File tree

5 files changed

+124
-43
lines changed

5 files changed

+124
-43
lines changed

genai-perf/genai_perf/logging.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def emit(self, record):
5050
print(message)
5151

5252

53-
def init_logging(log_level: Optional[str] = None) -> None:
53+
def init_logging(log_level: Optional[str] = "INFO") -> None:
5454
"""Initialize logging configuration for the genai_perf package.
5555
5656
Args:
@@ -84,7 +84,7 @@ def init_logging(log_level: Optional[str] = None) -> None:
8484
"loggers": {
8585
"": { # root logger
8686
"handlers": ["console"],
87-
"level": "WARNING",
87+
"level": log_level,
8888
"propagate": False,
8989
},
9090
"__main__": { # if __name__ == '__main__'

genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from genai_perf.exceptions import GenAIPerfException
3737
from genai_perf.logging import logging
3838
from genai_perf.metrics import LLMMetrics, Statistics
39+
from genai_perf.profile_data_parser.parser_result import ParserResult
3940
from genai_perf.profile_data_parser.profile_data_parser import (
4041
ProfileDataParser,
4142
ResponseFormat,
@@ -110,6 +111,7 @@ def _parse_profile_data(self, data: dict) -> None:
110111

111112
def _parse_requests(self, requests: dict) -> LLMMetrics:
112113
"""Parse each requests in profile export data to extract key metrics."""
114+
parser_result = ParserResult()
113115
min_req_timestamp, max_res_timestamp = float("inf"), 0
114116
request_latencies: List[int] = []
115117
time_to_first_tokens: List[int] = []
@@ -134,7 +136,8 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
134136
res_timestamps = request["response_timestamps"]
135137
res_outputs = request["response_outputs"]
136138

137-
self._preprocess_response(res_timestamps, res_outputs)
139+
self._preprocess_response(res_timestamps, res_outputs, parser_result)
140+
parser_result.success += len(res_outputs)
138141

139142
# Skip requests with empty response. This happens sometimes when the
140143
# model returns a single response with empty string.
@@ -253,6 +256,9 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
253256
goodput_val = self._calculate_goodput(benchmark_duration, llm_metrics)
254257
llm_metrics.request_goodputs = goodput_val
255258

259+
# Report parsing results
260+
logger.info(parser_result.get_summary())
261+
256262
return llm_metrics
257263

258264
def _calculate_throughput_metrics(
@@ -287,7 +293,10 @@ def _pairwise(self, iterable):
287293
return zip(iterable, iterable[1:])
288294

289295
def _preprocess_response(
290-
self, res_timestamps: List[int], res_outputs: List[Dict[str, str]]
296+
self,
297+
res_timestamps: List[int],
298+
res_outputs: List[Dict[str, str]],
299+
parser_result: ParserResult,
291300
) -> None:
292301
"""Helper function to preprocess responses of a request."""
293302
if (
@@ -330,37 +339,54 @@ def _preprocess_response(
330339
# Check if any error event occurred.
331340
for r in responses:
332341
if sse_error_occurred(r):
333-
raise GenAIPerfException(
342+
logger.error(
334343
f"Detected an error event in the SSE response: {r}"
335344
)
336-
337-
if len(responses) > 1:
338-
data = load_json_str(remove_sse_prefix(responses[0]))
339-
if self._response_format == ResponseFormat.TRITON_GENERATE:
340-
merged_text = "".join(
341-
[self._extract_generate_text_output(r) for r in responses]
342-
)
343-
data["text_output"] = merged_text
344-
elif self._response_format == ResponseFormat.HUGGINGFACE_GENERATE:
345-
merged_text = "".join(
346-
[
347-
self._extract_huggingface_generate_text_output(r)
348-
for r in responses
349-
]
350-
)
351-
if isinstance(data, list) and len(data) > 0:
352-
data[0]["generated_text"] = merged_text # type: ignore
353-
else:
354-
merged_text = "".join(
355-
[self._extract_text_output(r) for r in responses]
356-
)
357-
if self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
358-
data["choices"][0]["text"] = merged_text
345+
res_outputs[i]["response"] = ""
346+
parser_result.failed += 1
347+
348+
try:
349+
if len(responses) > 1:
350+
data = load_json_str(remove_sse_prefix(responses[0]))
351+
if self._response_format == ResponseFormat.TRITON_GENERATE:
352+
merged_text = "".join(
353+
[
354+
self._extract_generate_text_output(r)
355+
for r in responses
356+
]
357+
)
358+
data["text_output"] = merged_text
359+
elif (
360+
self._response_format == ResponseFormat.HUGGINGFACE_GENERATE
361+
):
362+
merged_text = "".join(
363+
[
364+
self._extract_huggingface_generate_text_output(r)
365+
for r in responses
366+
]
367+
)
368+
if isinstance(data, list) and len(data) > 0:
369+
data[0]["generated_text"] = merged_text # type: ignore
359370
else:
360-
data["choices"][0]["delta"]["content"] = merged_text
361-
res_outputs[i] = {"response": orjson.dumps(data).decode("utf-8")}
362-
elif self._is_empty_response(responses[0]):
371+
merged_text = "".join(
372+
[self._extract_text_output(r) for r in responses]
373+
)
374+
if (
375+
self._response_format
376+
== ResponseFormat.OPENAI_COMPLETIONS
377+
):
378+
data["choices"][0]["text"] = merged_text
379+
else:
380+
data["choices"][0]["delta"]["content"] = merged_text
381+
res_outputs[i] = {
382+
"response": orjson.dumps(data).decode("utf-8")
383+
}
384+
elif self._is_empty_response(responses[0]):
385+
res_outputs[i]["response"] = ""
386+
except Exception as e:
387+
logger.error(f"Error parsing a response: {e}")
363388
res_outputs[i]["response"] = ""
389+
parser_result.failed += 1
364390

365391
# Remove responses without any content
366392
indices_to_remove = []
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
from dataclasses import dataclass
28+
29+
30+
@dataclass
31+
class ParserResult:
32+
"""A class that contains the parsing results of the profile data."""
33+
34+
success: int = 0
35+
failed: int = 0
36+
37+
def get_summary(self) -> str:
38+
"""Get a summary of the parsing results."""
39+
total = self.success + self.failed
40+
success_rate = (self.success / total * 100) if total > 0 else 0
41+
summary = f"Parsed {total:,} responses: {self.success:,} successful ({success_rate:.1f}%), {self.failed:,} failed"
42+
return summary

genai-perf/genai_perf/utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,13 @@ def load_json_str(json_str: str, func: Callable = lambda x: x) -> Dict[str, Any]
101101
# notably being stricter on UTF-8 conformance.
102102
# Refer to https://github.com/ijl/orjson?tab=readme-ov-file#str for details.
103103
return func(orjson.loads(json_str))
104-
except orjson.JSONDecodeError:
104+
except orjson.JSONDecodeError as e:
105105
snippet = json_str[:200] + ("..." if len(json_str) > 200 else "")
106-
logger.error("Failed to parse JSON string: '%s'", snippet)
107-
raise
106+
raise orjson.JSONDecodeError(
107+
f"Failed to parse JSON string: '{snippet}'",
108+
json_str,
109+
e.pos,
110+
)
108111

109112

110113
def remove_file(file: Path) -> None:

genai-perf/tests/test_data_parser/test_llm_profile_data_parser.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from genai_perf.metrics import LLMMetrics
3535
from genai_perf.metrics.statistics import Statistics
3636
from genai_perf.profile_data_parser import LLMProfileDataParser
37+
from genai_perf.profile_data_parser.parser_result import ParserResult
3738
from genai_perf.profile_data_parser.profile_data_parser import ResponseFormat
3839
from genai_perf.tokenizer import get_tokenizer
3940
from tests.test_utils import check_statistics, ns_to_sec
@@ -1018,8 +1019,11 @@ def test_merged_sse_responses(
10181019
tokenizer=tokenizer,
10191020
)
10201021

1022+
parser_result = ParserResult()
10211023
res_timestamps = [i for i in range(len(res_outputs))]
1022-
pd._preprocess_response(res_timestamps, res_outputs)
1024+
pd._preprocess_response(res_timestamps, res_outputs, parser_result)
1025+
1026+
assert parser_result.failed == 0
10231027
assert res_outputs[0]["response"] == expected_response
10241028

10251029
@pytest.mark.parametrize(
@@ -1112,8 +1116,10 @@ def test_splintered_sse_responses(
11121116
tokenizer=tokenizer,
11131117
)
11141118

1119+
parser_result = ParserResult()
11151120
res_timestamps = [i for i in range(len(res_outputs))]
1116-
pd._preprocess_response(res_timestamps, res_outputs)
1121+
pd._preprocess_response(res_timestamps, res_outputs, parser_result)
1122+
assert parser_result.failed == 0
11171123

11181124
assert len(res_outputs) == len(expected_responses)
11191125
for out, expected_response in zip(res_outputs, expected_responses):
@@ -1164,7 +1170,9 @@ def test_handle_non_data_sse_fields(self, mock_json) -> None:
11641170
tokenizer=tokenizer,
11651171
)
11661172

1167-
pd._preprocess_response(res_timestamps, res_outputs)
1173+
parser_result = ParserResult()
1174+
pd._preprocess_response(res_timestamps, res_outputs, parser_result)
1175+
assert parser_result.failed == 0
11681176

11691177
assert len(res_outputs) == 2 and len(res_timestamps) == 2
11701178
assert res_outputs[0]["response"] == expected_responses[0]
@@ -1208,12 +1216,11 @@ def test_handle_sse_error(self, mock_json, res_outputs) -> None:
12081216
tokenizer=tokenizer,
12091217
)
12101218

1211-
with pytest.raises(GenAIPerfException) as excinfo:
1212-
res_timestamps = [i for i in range(len(res_outputs))]
1213-
pd._preprocess_response(res_timestamps, res_outputs)
1219+
parser_result = ParserResult()
1220+
res_timestamps = [i for i in range(len(res_outputs))]
1221+
pd._preprocess_response(res_timestamps, res_outputs, parser_result)
12141222

1215-
expected_error_msg = "Detected an error event in the SSE response: event: error: some error occurred."
1216-
assert str(excinfo.value) == expected_error_msg
1223+
assert parser_result.failed == 1
12171224

12181225
@patch(
12191226
"genai_perf.profile_data_parser.profile_data_parser.load_json",
@@ -1239,7 +1246,10 @@ def test_non_sse_response(self, mock_json) -> None:
12391246
tokenizer=tokenizer,
12401247
)
12411248

1242-
pd._preprocess_response(res_timestamps, res_outputs)
1249+
parser_result = ParserResult()
1250+
pd._preprocess_response(res_timestamps, res_outputs, parser_result)
1251+
1252+
assert parser_result.failed == 0
12431253
assert res_outputs[0]["response"] == expected_response
12441254

12451255
###############################

0 commit comments

Comments
 (0)