fix: gracefully handle erroneous SSE responses (#440)

nv-hwoo · web-flow · commit 3c0bc9efa184 · 2025-08-29T12:13:15.000-07:00
* gracefully handle erroneous responses

* remove unused import

* display error at a central location
diff --git a/genai-perf/genai_perf/logging.py b/genai-perf/genai_perf/logging.py
@@ -50,7 +50,7 @@ def emit(self, record):
         print(message)
 
 
-def init_logging(log_level: Optional[str] = None) -> None:
+def init_logging(log_level: Optional[str] = "INFO") -> None:
     """Initialize logging configuration for the genai_perf package.
 
     Args:
@@ -84,7 +84,7 @@ def init_logging(log_level: Optional[str] = None) -> None:
         "loggers": {
             "": {  # root logger
                 "handlers": ["console"],
-                "level": "WARNING",
+                "level": log_level,
                 "propagate": False,
             },
             "__main__": {  # if __name__ == '__main__'
diff --git a/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py
@@ -36,6 +36,7 @@
 from genai_perf.exceptions import GenAIPerfException
 from genai_perf.logging import logging
 from genai_perf.metrics import LLMMetrics, Statistics
+from genai_perf.profile_data_parser.parser_result import ParserResult
 from genai_perf.profile_data_parser.profile_data_parser import (
     ProfileDataParser,
     ResponseFormat,
@@ -110,6 +111,7 @@ def _parse_profile_data(self, data: dict) -> None:
 
     def _parse_requests(self, requests: dict) -> LLMMetrics:
         """Parse each requests in profile export data to extract key metrics."""
+        parser_result = ParserResult()
         min_req_timestamp, max_res_timestamp = float("inf"), 0
         request_latencies: List[int] = []
         time_to_first_tokens: List[int] = []
@@ -134,7 +136,8 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
             res_timestamps = request["response_timestamps"]
             res_outputs = request["response_outputs"]
 
-            self._preprocess_response(res_timestamps, res_outputs)
+            self._preprocess_response(res_timestamps, res_outputs, parser_result)
+            parser_result.success += len(res_outputs)
 
             # Skip requests with empty response. This happens sometimes when the
             # model returns a single response with empty string.
@@ -253,6 +256,9 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
             goodput_val = self._calculate_goodput(benchmark_duration, llm_metrics)
             llm_metrics.request_goodputs = goodput_val
 
+        # Report parsing results
+        logger.info(parser_result.get_summary())
+
         return llm_metrics
 
     def _calculate_throughput_metrics(
@@ -287,7 +293,10 @@ def _pairwise(self, iterable):
         return zip(iterable, iterable[1:])
 
     def _preprocess_response(
-        self, res_timestamps: List[int], res_outputs: List[Dict[str, str]]
+        self,
+        res_timestamps: List[int],
+        res_outputs: List[Dict[str, str]],
+        parser_result: ParserResult,
     ) -> None:
         """Helper function to preprocess responses of a request."""
         if (
@@ -330,37 +339,54 @@ def _preprocess_response(
                 # Check if any error event occurred.
                 for r in responses:
                     if sse_error_occurred(r):
-                        raise GenAIPerfException(
+                        logger.error(
                             f"Detected an error event in the SSE response: {r}"
                         )
-
-                if len(responses) > 1:
-                    data = load_json_str(remove_sse_prefix(responses[0]))
-                    if self._response_format == ResponseFormat.TRITON_GENERATE:
-                        merged_text = "".join(
-                            [self._extract_generate_text_output(r) for r in responses]
-                        )
-                        data["text_output"] = merged_text
-                    elif self._response_format == ResponseFormat.HUGGINGFACE_GENERATE:
-                        merged_text = "".join(
-                            [
-                                self._extract_huggingface_generate_text_output(r)
-                                for r in responses
-                            ]
-                        )
-                        if isinstance(data, list) and len(data) > 0:
-                            data[0]["generated_text"] = merged_text  # type: ignore
-                    else:
-                        merged_text = "".join(
-                            [self._extract_text_output(r) for r in responses]
-                        )
-                        if self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
-                            data["choices"][0]["text"] = merged_text
+                        res_outputs[i]["response"] = ""
+                        parser_result.failed += 1
+
+                try:
+                    if len(responses) > 1:
+                        data = load_json_str(remove_sse_prefix(responses[0]))
+                        if self._response_format == ResponseFormat.TRITON_GENERATE:
+                            merged_text = "".join(
+                                [
+                                    self._extract_generate_text_output(r)
+                                    for r in responses
+                                ]
+                            )
+                            data["text_output"] = merged_text
+                        elif (
+                            self._response_format == ResponseFormat.HUGGINGFACE_GENERATE
+                        ):
+                            merged_text = "".join(
+                                [
+                                    self._extract_huggingface_generate_text_output(r)
+                                    for r in responses
+                                ]
+                            )
+                            if isinstance(data, list) and len(data) > 0:
+                                data[0]["generated_text"] = merged_text  # type: ignore
                         else:
-                            data["choices"][0]["delta"]["content"] = merged_text
-                    res_outputs[i] = {"response": orjson.dumps(data).decode("utf-8")}
-                elif self._is_empty_response(responses[0]):
+                            merged_text = "".join(
+                                [self._extract_text_output(r) for r in responses]
+                            )
+                            if (
+                                self._response_format
+                                == ResponseFormat.OPENAI_COMPLETIONS
+                            ):
+                                data["choices"][0]["text"] = merged_text
+                            else:
+                                data["choices"][0]["delta"]["content"] = merged_text
+                        res_outputs[i] = {
+                            "response": orjson.dumps(data).decode("utf-8")
+                        }
+                    elif self._is_empty_response(responses[0]):
+                        res_outputs[i]["response"] = ""
+                except Exception as e:
+                    logger.error(f"Error parsing a response: {e}")
                     res_outputs[i]["response"] = ""
+                    parser_result.failed += 1
 
             # Remove responses without any content
             indices_to_remove = []
diff --git a/genai-perf/genai_perf/profile_data_parser/parser_result.py b/genai-perf/genai_perf/profile_data_parser/parser_result.py
@@ -0,0 +1,42 @@
+# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from dataclasses import dataclass
+
+
+@dataclass
+class ParserResult:
+    """A class that contains the parsing results of the profile data."""
+
+    success: int = 0
+    failed: int = 0
+
+    def get_summary(self) -> str:
+        """Get a summary of the parsing results."""
+        total = self.success + self.failed
+        success_rate = (self.success / total * 100) if total > 0 else 0
+        summary = f"Parsed {total:,} responses: {self.success:,} successful ({success_rate:.1f}%), {self.failed:,} failed"
+        return summary
diff --git a/genai-perf/genai_perf/utils.py b/genai-perf/genai_perf/utils.py
@@ -101,10 +101,13 @@ def load_json_str(json_str: str, func: Callable = lambda x: x) -> Dict[str, Any]
         # notably being stricter on UTF-8 conformance.
         # Refer to https://github.com/ijl/orjson?tab=readme-ov-file#str for details.
         return func(orjson.loads(json_str))
-    except orjson.JSONDecodeError:
+    except orjson.JSONDecodeError as e:
         snippet = json_str[:200] + ("..." if len(json_str) > 200 else "")
-        logger.error("Failed to parse JSON string: '%s'", snippet)
-        raise
+        raise orjson.JSONDecodeError(
+            f"Failed to parse JSON string: '{snippet}'",
+            json_str,
+            e.pos,
+        )
 
 
 def remove_file(file: Path) -> None:
diff --git a/genai-perf/tests/test_data_parser/test_llm_profile_data_parser.py b/genai-perf/tests/test_data_parser/test_llm_profile_data_parser.py
@@ -34,6 +34,7 @@
 from genai_perf.metrics import LLMMetrics
 from genai_perf.metrics.statistics import Statistics
 from genai_perf.profile_data_parser import LLMProfileDataParser
+from genai_perf.profile_data_parser.parser_result import ParserResult
 from genai_perf.profile_data_parser.profile_data_parser import ResponseFormat
 from genai_perf.tokenizer import get_tokenizer
 from tests.test_utils import check_statistics, ns_to_sec
@@ -1018,8 +1019,11 @@ def test_merged_sse_responses(
                 tokenizer=tokenizer,
             )
 
+        parser_result = ParserResult()
         res_timestamps = [i for i in range(len(res_outputs))]
-        pd._preprocess_response(res_timestamps, res_outputs)
+        pd._preprocess_response(res_timestamps, res_outputs, parser_result)
+
+        assert parser_result.failed == 0
         assert res_outputs[0]["response"] == expected_response
 
     @pytest.mark.parametrize(
@@ -1112,8 +1116,10 @@ def test_splintered_sse_responses(
                 tokenizer=tokenizer,
             )
 
+        parser_result = ParserResult()
         res_timestamps = [i for i in range(len(res_outputs))]
-        pd._preprocess_response(res_timestamps, res_outputs)
+        pd._preprocess_response(res_timestamps, res_outputs, parser_result)
+        assert parser_result.failed == 0
 
         assert len(res_outputs) == len(expected_responses)
         for out, expected_response in zip(res_outputs, expected_responses):
@@ -1164,7 +1170,9 @@ def test_handle_non_data_sse_fields(self, mock_json) -> None:
             tokenizer=tokenizer,
         )
 
-        pd._preprocess_response(res_timestamps, res_outputs)
+        parser_result = ParserResult()
+        pd._preprocess_response(res_timestamps, res_outputs, parser_result)
+        assert parser_result.failed == 0
 
         assert len(res_outputs) == 2 and len(res_timestamps) == 2
         assert res_outputs[0]["response"] == expected_responses[0]
@@ -1208,12 +1216,11 @@ def test_handle_sse_error(self, mock_json, res_outputs) -> None:
             tokenizer=tokenizer,
         )
 
-        with pytest.raises(GenAIPerfException) as excinfo:
-            res_timestamps = [i for i in range(len(res_outputs))]
-            pd._preprocess_response(res_timestamps, res_outputs)
+        parser_result = ParserResult()
+        res_timestamps = [i for i in range(len(res_outputs))]
+        pd._preprocess_response(res_timestamps, res_outputs, parser_result)
 
-        expected_error_msg = "Detected an error event in the SSE response: event: error: some error occurred."
-        assert str(excinfo.value) == expected_error_msg
+        assert parser_result.failed == 1
 
     @patch(
         "genai_perf.profile_data_parser.profile_data_parser.load_json",
@@ -1239,7 +1246,10 @@ def test_non_sse_response(self, mock_json) -> None:
             tokenizer=tokenizer,
         )
 
-        pd._preprocess_response(res_timestamps, res_outputs)
+        parser_result = ParserResult()
+        pd._preprocess_response(res_timestamps, res_outputs, parser_result)
+
+        assert parser_result.failed == 0
         assert res_outputs[0]["response"] == expected_response
 
     ###############################