Add: Json Parsing to solve Hallucination Errors (mem0ai#3013)

akshat1423 · web-flow · commit 2bb0653e679c · 2025-06-23T21:50:16.000+05:30
diff --git a/evaluation/metrics/llm_judge.py b/evaluation/metrics/llm_judge.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from openai import OpenAI
+from mem0.memory.utils import extract_json
 
 client = OpenAI()
 
@@ -22,7 +23,7 @@
 
 For time related questions, the gold answer will be a specific date, month, year, etc. The generated answer might be much longer or use relative time references (like "last Tuesday" or "next month"), but you should be generous with your grading - as long as it refers to the same date or time period as the gold answer, it should be counted as CORRECT. Even if the format differs (e.g., "May 7th" vs "7 May"), consider it CORRECT if it's the same date.
 
-Now it’s time for the real question:
+Now it's time for the real question:
 Question: {question}
 Gold answer: {gold_answer}
 Generated answer: {generated_answer}
@@ -49,7 +50,7 @@ def evaluate_llm_judge(question, gold_answer, generated_answer):
         response_format={"type": "json_object"},
         temperature=0.0,
     )
-    label = json.loads(response.choices[0].message.content)["label"]
+    label = json.loads(extract_json(response.choices[0].message.content))["label"]
     return 1 if label == "CORRECT" else 0
 
 
diff --git a/mem0/embeddings/aws_bedrock.py b/mem0/embeddings/aws_bedrock.py
@@ -11,6 +11,7 @@
 
 from mem0.configs.embeddings.base import BaseEmbedderConfig
 from mem0.embeddings.base import EmbeddingBase
+from mem0.memory.utils import extract_json
 
 
 class AWSBedrockEmbedding(EmbeddingBase):
@@ -74,7 +75,7 @@ def _get_embedding(self, text):
                 contentType="application/json",
             )
 
-            response_body = json.loads(response.get("body").read())
+            response_body = json.loads(extract_json(response.get("body").read()))
 
             if provider == "cohere":
                 embeddings = response_body.get("embeddings")[0]
diff --git a/mem0/llms/aws_bedrock.py b/mem0/llms/aws_bedrock.py
@@ -10,6 +10,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 PROVIDERS = ["ai21", "amazon", "anthropic", "cohere", "meta", "mistral", "stability", "writer"]
 
@@ -101,7 +102,7 @@ def _parse_response(self, response, tools) -> str:
             return processed_response
 
         response_body = response.get("body").read().decode()
-        response_json = json.loads(response_body)
+        response_json = json.loads(extract_json(response_body))
         return response_json.get("content", [{"text": ""}])[0].get("text", "")
 
     def _prepare_input(
diff --git a/mem0/llms/azure_openai.py b/mem0/llms/azure_openai.py
@@ -6,6 +6,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class AzureOpenAILLM(LLMBase):
@@ -53,7 +54,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/deepseek.py b/mem0/llms/deepseek.py
@@ -6,6 +6,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class DeepSeekLLM(LLMBase):
@@ -41,7 +42,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/groq.py b/mem0/llms/groq.py
@@ -9,6 +9,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class GroqLLM(LLMBase):
@@ -43,7 +44,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/litellm.py b/mem0/llms/litellm.py
@@ -8,6 +8,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class LiteLLM(LLMBase):
@@ -39,7 +40,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/openai.py b/mem0/llms/openai.py
@@ -7,6 +7,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class OpenAILLM(LLMBase):
@@ -62,7 +63,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/together.py b/mem0/llms/together.py
@@ -9,6 +9,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class TogetherLLM(LLMBase):
@@ -43,7 +44,7 @@ def _parse_response(self, response, tools):
                     processed_response["tool_calls"].append(
                         {
                             "name": tool_call.function.name,
-                            "arguments": json.loads(tool_call.function.arguments),
+                            "arguments": json.loads(extract_json(tool_call.function.arguments)),
                         }
                     )
 
diff --git a/mem0/llms/utils/__init__.py b/mem0/llms/utils/__init__.py
diff --git a/mem0/llms/utils/functions.py b/mem0/llms/utils/functions.py
diff --git a/mem0/llms/vllm.py b/mem0/llms/vllm.py
@@ -4,6 +4,7 @@
 
 from mem0.configs.llms.base import BaseLlmConfig
 from mem0.llms.base import LLMBase
+from mem0.memory.utils import extract_json
 
 
 class VllmLLM(LLMBase):
@@ -39,7 +40,7 @@ def _parse_response(self, response, tools):
                 for tool_call in response.choices[0].message.tool_calls:
                     processed_response["tool_calls"].append({
                         "name": tool_call.function.name,
-                        "arguments": json.loads(tool_call.function.arguments),
+                        "arguments": json.loads(extract_json(tool_call.function.arguments)),
                     })
 
             return processed_response
diff --git a/mem0/memory/utils.py b/mem0/memory/utils.py
@@ -46,6 +46,20 @@ def remove_code_blocks(content: str) -> str:
     return match.group(1).strip() if match else content.strip()
 
 
+def extract_json(text):
+    """
+    Extracts JSON content from a string, removing enclosing triple backticks and optional 'json' tag if present.
+    If no code block is found, returns the text as-is.
+    """
+    text = text.strip()
+    match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
+    if match:
+        json_str = match.group(1)
+    else:
+        json_str = text  # assume it's raw JSON
+    return json_str
+
+
 def get_image_description(image_obj, llm, vision_details):
     """
     Get the description of the image
diff --git a/mem0/vector_stores/azure_ai_search.py b/mem0/vector_stores/azure_ai_search.py
@@ -6,6 +6,7 @@
 from pydantic import BaseModel
 
 from mem0.vector_stores.base import VectorStoreBase
+from mem0.memory.utils import extract_json
 
 try:
     from azure.core.credentials import AzureKeyCredential
@@ -233,7 +234,7 @@ def search(self, query, vectors, limit=5, filters=None):
 
         results = []
         for result in search_results:
-            payload = json.loads(result["payload"])
+            payload = json.loads(extract_json(result["payload"]))
             results.append(OutputData(id=result["id"], score=result["@search.score"], payload=payload))
         return results
 
@@ -288,7 +289,8 @@ def get(self, vector_id) -> OutputData:
             result = self.search_client.get_document(key=vector_id)
         except ResourceNotFoundError:
             return None
-        return OutputData(id=result["id"], score=None, payload=json.loads(result["payload"]))
+        payload = json.loads(extract_json(result["payload"]))
+        return OutputData(id=result["id"], score=None, payload=payload)
 
     def list_cols(self) -> List[str]:
         """
@@ -335,7 +337,7 @@ def list(self, filters=None, limit=100):
         search_results = self.search_client.search(search_text="*", filter=filter_expression, top=limit)
         results = []
         for result in search_results:
-            payload = json.loads(result["payload"])
+            payload = json.loads(extract_json(result["payload"]))
             results.append(OutputData(id=result["id"], score=result["@search.score"], payload=payload))
         return [results]
 
diff --git a/mem0/vector_stores/redis.py b/mem0/vector_stores/redis.py
@@ -12,6 +12,7 @@
 from redisvl.query.filter import Tag
 
 from mem0.vector_stores.base import VectorStoreBase
+from mem0.memory.utils import extract_json
 
 logger = logging.getLogger(__name__)
 
@@ -175,7 +176,7 @@ def search(self, query: str, vectors: list, limit: int = 5, filters: dict = None
                         else {}
                     ),
                     **{field: result[field] for field in ["agent_id", "run_id", "user_id"] if field in result},
-                    **{k: v for k, v in json.loads(result["metadata"]).items()},
+                    **{k: v for k, v in json.loads(extract_json(result["metadata"])).items()},
                 },
             )
             for result in results
@@ -219,7 +220,7 @@ def get(self, vector_id):
                 else {}
             ),
             **{field: result[field] for field in ["agent_id", "run_id", "user_id"] if field in result},
-            **{k: v for k, v in json.loads(result["metadata"]).items()},
+            **{k: v for k, v in json.loads(extract_json(result["metadata"])).items()},
         }
 
         return MemoryResult(id=result["memory_id"], payload=payload)
@@ -286,7 +287,7 @@ def list(self, filters: dict = None, limit: int = None) -> list:
                             for field in ["agent_id", "run_id", "user_id"]
                             if field in result.__dict__
                         },
-                        **{k: v for k, v in json.loads(result["metadata"]).items()},
+                        **{k: v for k, v in json.loads(extract_json(result["metadata"])).items()},
                     },
                 )
                 for result in results.docs

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`
`7`	`7`	`from mem0.configs.llms.base import BaseLlmConfig`
`8`	`8`	`from mem0.llms.base import LLMBase`
	`9`	`+from mem0.memory.utils import extract_json`
`9`	`10`
`10`	`11`
`11`	`12`	`class AzureOpenAILLM(LLMBase):`
`@@ -53,7 +54,7 @@ def _parse_response(self, response, tools):`
`53`	`54`	`processed_response["tool_calls"].append(`
`54`	`55`	`{`
`55`	`56`	`"name": tool_call.function.name,`
`56`		`- "arguments": json.loads(tool_call.function.arguments),`
	`57`	`+ "arguments": json.loads(extract_json(tool_call.function.arguments)),`
`57`	`58`	`}`
`58`	`59`	`)`
`59`	`60`
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@`
`9`	`9`
`10`	`10`	`from mem0.configs.llms.base import BaseLlmConfig`
`11`	`11`	`from mem0.llms.base import LLMBase`
	`12`	`+from mem0.memory.utils import extract_json`
`12`	`13`
`13`	`14`
`14`	`15`	`class GroqLLM(LLMBase):`
`@@ -43,7 +44,7 @@ def _parse_response(self, response, tools):`
`43`	`44`	`processed_response["tool_calls"].append(`
`44`	`45`	`{`
`45`	`46`	`"name": tool_call.function.name,`
`46`		`- "arguments": json.loads(tool_call.function.arguments),`
	`47`	`+ "arguments": json.loads(extract_json(tool_call.function.arguments)),`
`47`	`48`	`}`
`48`	`49`	`)`
`49`	`50`
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@`
`8`	`8`
`9`	`9`	`from mem0.configs.llms.base import BaseLlmConfig`
`10`	`10`	`from mem0.llms.base import LLMBase`
	`11`	`+from mem0.memory.utils import extract_json`
`11`	`12`
`12`	`13`
`13`	`14`	`class LiteLLM(LLMBase):`
`@@ -39,7 +40,7 @@ def _parse_response(self, response, tools):`
`39`	`40`	`processed_response["tool_calls"].append(`
`40`	`41`	`{`
`41`	`42`	`"name": tool_call.function.name,`
`42`		`- "arguments": json.loads(tool_call.function.arguments),`
	`43`	`+ "arguments": json.loads(extract_json(tool_call.function.arguments)),`
`43`	`44`	`}`
`44`	`45`	`)`
`45`	`46`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`
`8`	`8`	`from mem0.configs.llms.base import BaseLlmConfig`
`9`	`9`	`from mem0.llms.base import LLMBase`
	`10`	`+from mem0.memory.utils import extract_json`
`10`	`11`
`11`	`12`
`12`	`13`	`class OpenAILLM(LLMBase):`
`@@ -62,7 +63,7 @@ def _parse_response(self, response, tools):`
`62`	`63`	`processed_response["tool_calls"].append(`
`63`	`64`	`{`
`64`	`65`	`"name": tool_call.function.name,`
`65`		`- "arguments": json.loads(tool_call.function.arguments),`
	`66`	`+ "arguments": json.loads(extract_json(tool_call.function.arguments)),`
`66`	`67`	`}`
`67`	`68`	`)`
`68`	`69`