Fix: Resolve Agentic failure loop with improved json parsing (#1541)

ahgraber · web-flow · commit 8deca9231191 · 2024-10-22T09:26:44.000+05:30
Fixes #1538 - Add final instruction for better json schema adherence and reduce "filler" text - Add `extract_json` function to identify json by pairs of `[]` or `{}` - Add unit tests for `extract_json` **Notes:** In my testing, these changes vastly reduce the number of times the 'repair' agent is triggered. However, this does not fix _all_ parse errors: 1. In some cases, json is valid json, but keys are invalid 2. In some cases, especially with more complex Pydantic models (e.g., `Themes`, which is composed of `List[Theme]`), the Pydantic json schema includes `$defs`. As a result, some LLMs will return `$defs` or include `"$schema": "https://json-schema.org/draft/...`; either of these additional keys will cause the Pydantic parser to fail.
diff --git a/src/ragas/prompt/pydantic_prompt.py b/src/ragas/prompt/pydantic_prompt.py
@@ -16,7 +16,7 @@
 from ragas.exceptions import RagasOutputParserException
 
 from .base import BasePrompt, StringIO, _check_if_language_is_supported
-from .utils import get_all_strings, update_strings
+from .utils import extract_json, get_all_strings, update_strings
 
 if t.TYPE_CHECKING:
     from langchain_core.callbacks import Callbacks
@@ -82,6 +82,7 @@ def to_string(self, data: t.Optional[InputModel] = None) -> str:
                 if data is not None
                 else "input: (None)\n"
             )
+            + "Respond only with a valid JSON object that complies with the specified schema.\n"
             + "output: "
         )
 
@@ -393,7 +394,8 @@ async def parse_output_string(
     ):
         callbacks = callbacks or []
         try:
-            result = super().parse(output_string)
+            jsonstr = extract_json(output_string)
+            result = super().parse(jsonstr)
         except OutputParserException:
             if max_retries != 0:
                 retry_rm, retry_cb = new_group(
diff --git a/src/ragas/prompt/utils.py b/src/ragas/prompt/utils.py
@@ -64,3 +64,43 @@ def replace_string(s: str) -> str:
         return {k: update_strings(v, old_strings, new_strings) for k, v in obj.items()}
 
     return copy.deepcopy(obj)
+
+
+def extract_json(text: str) -> str:
+    """Identify json from a text blob by matching '[]' or '{}'.
+
+    Warning: This will identify the first json structure!"""
+
+    # check for markdown indicator; if present, start there
+    md_json_idx = text.find("```json")
+    if md_json_idx != -1:
+        text = text[md_json_idx:]
+
+    # search for json delimiter pairs
+    left_bracket_idx = text.find("[")
+    left_brace_idx = text.find("{")
+
+    indices = [idx for idx in (left_bracket_idx, left_brace_idx) if idx != -1]
+    start_idx = min(indices) if indices else None
+
+    # If no delimiter found, return the original text
+    if start_idx is None:
+        return text
+
+    # Identify the exterior delimiters defining JSON
+    open_char = text[start_idx]
+    close_char = "]" if open_char == "[" else "}"
+
+    # Initialize a count to keep track of delimiter pairs
+    count = 0
+    for i, char in enumerate(text[start_idx:], start=start_idx):
+        if char == open_char:
+            count += 1
+        elif char == close_char:
+            count -= 1
+
+        # When count returns to zero, we've found a complete structure
+        if count == 0:
+            return text[start_idx : i + 1]
+
+    return text  # In case of unbalanced JSON, return the original text
diff --git a/tests/unit/prompt/test_prompt_utils.py b/tests/unit/prompt/test_prompt_utils.py
@@ -3,7 +3,7 @@
 import pytest
 from pydantic import BaseModel
 
-from ragas.prompt.utils import get_all_strings, update_strings
+from ragas.prompt.utils import extract_json, get_all_strings, update_strings
 
 
 class Category(BaseModel):
@@ -122,3 +122,93 @@ def test_update_strings(obj, old_strings, new_strings):
 
     assert get_all_strings(updated_obj) == new_strings
     assert get_all_strings(obj) == old_strings
+
+
+class TestExtractJson:
+    prefix = "Here's the generated abstract conceptual question in the requested JSON format: "
+    suffix = "Would you like me to explain in more detail?"
+    object = """{"key": "value"}"""
+    array = """[1, 2, 3]"""
+    nested = """{"outer": {"inner": [1, 2, 3]}}"""
+
+    test_cases = [
+        (object, object),
+        (array, array),
+        (nested, nested),
+        (prefix + object, object),
+        (object + suffix, object),
+        (prefix + object + suffix, object),
+        (prefix + array, array),
+        (array + suffix, array),
+        (prefix + array + suffix, array),
+        (prefix + nested, nested),
+        (nested + suffix, nested),
+        (prefix + nested + suffix, nested),
+        (object + array + nested, object),
+        (nested + object + array, nested),
+    ]
+
+    @pytest.mark.parametrize("text, expected", test_cases)
+    def test_extract_json(self, text, expected):
+        assert extract_json(text) == expected
+
+    def test_extract_empty_array(self):
+        text = "Here is an empty array: [] and some text."
+        expected = "[]"
+        assert extract_json(text) == expected
+
+    def test_extract_empty_object(self):
+        text = "Here is an empty object: {} and more text."
+        expected = "{}"
+        assert extract_json(text) == expected
+
+    def test_extract_incomplete_json(self):
+        text = 'Not complete: {"key": "value", "array": [1, 2, 3'
+        expected = 'Not complete: {"key": "value", "array": [1, 2, 3'
+        assert extract_json(text) == expected
+
+    def test_markdown_json(self):
+        text = """
+        ```python
+        import json
+
+        def modify_query(input_data):
+            query = input_data["query"]
+            style = input_data["style"]
+            length = input_data["length"]
+
+            if style == "Poor grammar":
+                # Poor grammar modifications (simplified for brevity)
+                query = query.replace("How", "how")
+                query = query.replace("do", "does")
+                query = query.replace("terms of", "in terms of")
+                query = query.replace("and", "")
+
+            if length == "long":
+                # Long text modifications (simplified for brevity)
+                query += "?"
+
+            return {
+                "text": query
+            }
+
+        input_data = {
+            "query": "How can the provided commands be used to manage and troubleshoot namespaces in a Kubernetes environment?",
+            "style": "Poor grammar",
+            "length": "long"
+        }
+
+        output = modify_query(input_data)
+        print(json.dumps(output, indent=4))
+        ```
+
+        Output:
+        ```json
+        {"text": "how does the provided commands be used to manage and troubleshoot namespaces in a Kubernetes environment?"}
+        ```
+        This Python function `modify_query` takes an input dictionary with query, style, and length as keys. It applies modifications based on the specified style (Poor grammar) and length (long). The modified query is then returned as a JSON object.
+
+        Note: This implementation is simplified for brevity and may not cover all possible edge cases or nuances of natural language processing.
+        """
+        expected = """{"text": "how does the provided commands be used to manage and troubleshoot namespaces in a Kubernetes environment?"}"""
+        assert extract_json(text) == expected