-
Notifications
You must be signed in to change notification settings - Fork 45
fix: json loads issue parsing LLM responses #254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |||||||||||||||||||||||||||||
| secure_format_prompt, | ||||||||||||||||||||||||||||||
| validate_custom_prompt, | ||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||
| from agent_memory_server.utils.llm_json import parse_llm_json | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
| logger = get_logger(__name__) | ||||||||||||||||||||||||||||||
|
|
@@ -175,7 +176,7 @@ async def extract_memories( | |||||||||||||||||||||||||||||
| response_format={"type": "json_object"}, | ||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||||||||
| response_data = json.loads(response.content) | ||||||||||||||||||||||||||||||
| response_data = parse_llm_json(response.content) | ||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||
| response_data = parse_llm_json(response.content) | |
| response_data = parse_llm_json(response.content) | |
| if not isinstance(response_data, dict): | |
| logger.error( | |
| "Expected JSON object with 'memories' key, got %s: %r", | |
| type(response_data).__name__, | |
| response.content, | |
| ) | |
| # Treat non-object top-level JSON as a decode/validation error | |
| raise json.JSONDecodeError( | |
| "Expected JSON object at top level", | |
| response.content, | |
| 0, | |
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| """Helpers for parsing JSON-shaped LLM responses.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import re | ||
| from collections.abc import Iterator | ||
| from typing import Any | ||
|
|
||
|
|
||
| _CODE_FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.IGNORECASE | re.DOTALL) | ||
| _JSON_START_RE = re.compile(r"[{\[]") | ||
|
|
||
|
Comment on lines
+11
to
+13
|
||
|
|
||
| def parse_llm_json(content: str) -> Any: | ||
| """Parse JSON from raw, fenced, or prose-wrapped LLM responses.""" | ||
| normalized = content.strip() | ||
| decoder = json.JSONDecoder() | ||
|
|
||
| try: | ||
| return decoder.decode(normalized) | ||
| except json.JSONDecodeError as error: | ||
| original_error = error | ||
|
|
||
| for candidate in _iter_json_candidates(normalized): | ||
| try: | ||
| parsed, _ = decoder.raw_decode(candidate) | ||
| return parsed | ||
| except json.JSONDecodeError: | ||
| continue | ||
|
|
||
| raise original_error | ||
|
|
||
|
|
||
| def _iter_json_candidates(content: str) -> Iterator[str]: | ||
| """Yield likely JSON payloads embedded within an LLM response.""" | ||
| seen: set[str] = set() | ||
|
|
||
| for match in _CODE_FENCE_RE.finditer(content): | ||
| candidate = match.group(1).strip() | ||
| if candidate and candidate not in seen: | ||
| seen.add(candidate) | ||
| yield candidate | ||
|
|
||
| # Fall back to scanning for embedded JSON objects/arrays inside prose. | ||
| for match in _JSON_START_RE.finditer(content): | ||
| candidate = content[match.start() :].lstrip() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential Bug: Hash collision with different whitespace The deduplication logic using # These would be treated as different candidates even though they're the same JSON:
candidate1 = '{"key": "value"}'
candidate2 = '{ "key": "value" }'This could cause:
Suggested improvement: Consider normalizing candidates before deduplication, or use a more robust approach: def _iter_json_candidates(content: str) -> Iterator[str]:
"""Yield likely JSON payloads embedded within an LLM response."""
seen: set[str] = set()
for match in _CODE_FENCE_RE.finditer(content):
candidate = match.group(1).strip()
# Normalize whitespace for deduplication
normalized = ' '.join(candidate.split())
if candidate and normalized not in seen:
seen.add(normalized)
yield candidate
# Fall back to scanning for embedded JSON objects/arrays inside prose.
for match in _JSON_START_RE.finditer(content):
candidate = content[match.start() :].lstrip()
normalized = ' '.join(candidate.split())
if candidate and normalized not in seen:
seen.add(normalized)
yield candidateAlternatively, since you're already using a try/catch for parsing, you might consider removing the deduplication entirely and just rely on finding the first successfully parsed JSON, as the performance cost is likely minimal.
Comment on lines
+46
to
+47
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I'm seeing, it looks like this regex approach would produce a lot of results if your JSON contained a list of dictionaries (or if you had a dictionary that contained lists). >>> p = re.compile(r"[{\[]")
>>> s = '```json\n[{"a": 1}, {"a": 2}, {"a": 3}]\n```'
>>> for match in p.finditer(s):
... print(s[match.start() :].lstrip())
[{"a": 1}, {"a": 2}, {"a": 3}]
\```
{"a": 1}, {"a": 2}, {"a": 3}]
\```
{"a": 2}, {"a": 3}]
\```
{"a": 3}]
\```It probably won't matter since
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch |
||
| if candidate and candidate not in seen: | ||
| seen.add(candidate) | ||
| yield candidate | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Uncaught AttributeError when parser extracts non-dict JSONMedium Severity The Additional Locations (2) |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| """Regression tests for GitHub issue #236.""" | ||
|
|
||
| import json | ||
| from unittest.mock import AsyncMock, Mock, patch | ||
|
|
||
| import pytest | ||
|
|
||
| from agent_memory_server.extraction import extract_entities_llm, extract_topics_llm | ||
| from agent_memory_server.llm import ChatCompletionResponse | ||
| from agent_memory_server.memory_strategies import ( | ||
| CustomMemoryStrategy, | ||
| DiscreteMemoryStrategy, | ||
| SummaryMemoryStrategy, | ||
| UserPreferencesMemoryStrategy, | ||
| ) | ||
| from agent_memory_server.utils.llm_json import parse_llm_json | ||
|
|
||
|
|
||
| class TestIssue236LlmJsonParsing: | ||
| """Verify JSON parsing tolerates markdown fences and wrapper prose.""" | ||
|
|
||
| @pytest.mark.parametrize( | ||
| ("content", "expected"), | ||
| [ | ||
| ( | ||
| '{"entities": ["Redis", "Snowflake"]}', | ||
| {"entities": ["Redis", "Snowflake"]}, | ||
| ), | ||
| ( | ||
| '```json\n{"entities": ["Redis", "Snowflake"]}\n```', | ||
| {"entities": ["Redis", "Snowflake"]}, | ||
| ), | ||
| ( | ||
| 'Here are the extracted topics:\n```json\n{"topics": ["data engineering", "recommendation engines"]}\n```\nI found these topics in the text.', | ||
| {"topics": ["data engineering", "recommendation engines"]}, | ||
| ), | ||
| ], | ||
| ) | ||
| def test_parse_llm_json_handles_wrapped_content(self, content, expected): | ||
| """The helper should recover valid JSON from common LLM wrappers.""" | ||
| assert parse_llm_json(content) == expected | ||
|
|
||
| def test_parse_llm_json_raises_for_invalid_content(self): | ||
| """Invalid non-JSON content should still fail fast.""" | ||
| with pytest.raises(json.JSONDecodeError): | ||
| parse_llm_json("This response contains no JSON payload at all.") | ||
|
|
||
| @pytest.mark.asyncio | ||
| @patch("agent_memory_server.extraction.LLMClient.create_chat_completion") | ||
| async def test_extract_entities_llm_parses_fenced_json(self, mock_llm): | ||
| """Entity extraction should work when the model wraps JSON in fences.""" | ||
| mock_llm.return_value = Mock( | ||
| content='```json\n{"entities": ["Redis", "Snowflake"]}\n```' | ||
| ) | ||
|
|
||
| entities = await extract_entities_llm("Redis works with Snowflake.") | ||
|
|
||
| assert set(entities) == {"Redis", "Snowflake"} | ||
| mock_llm.assert_called_once() | ||
|
|
||
| @pytest.mark.asyncio | ||
| @patch("agent_memory_server.extraction.LLMClient.create_chat_completion") | ||
| async def test_extract_topics_llm_parses_prose_wrapped_json(self, mock_llm): | ||
| """Topic extraction should work when commentary surrounds the JSON.""" | ||
| mock_llm.return_value = Mock( | ||
| content='Here are the extracted topics:\n```json\n{"topics": ["data engineering", "recommendation engines", "streaming"]}\n```\nI found these topics in the text.' | ||
| ) | ||
|
|
||
| topics = await extract_topics_llm( | ||
| "Kafka pipelines support recommendations.", num_topics=2 | ||
| ) | ||
|
|
||
| assert topics == ["data engineering", "recommendation engines"] | ||
| mock_llm.assert_called_once() | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| class TestIssue236MemoryStrategies: | ||
| """Verify memory extraction strategies accept wrapped JSON responses.""" | ||
|
|
||
| @pytest.mark.parametrize( | ||
| ("strategy_builder", "response_content"), | ||
| [ | ||
| ( | ||
| lambda: DiscreteMemoryStrategy(), | ||
| '```json\n{"memories": [{"type": "semantic", "text": "User prefers Redis", "topics": ["preferences"], "entities": ["User", "Redis"], "event_date": null}]}\n```', | ||
| ), | ||
| ( | ||
| lambda: SummaryMemoryStrategy(max_summary_length=100), | ||
| 'Summary generated below.\n```json\n{"memories": [{"type": "semantic", "text": "User discussed Redis adoption", "topics": ["redis"], "entities": ["User", "Redis"]}]}\n```\nDone.', | ||
| ), | ||
| ( | ||
| lambda: UserPreferencesMemoryStrategy(), | ||
| '```json\n{"memories": [{"type": "semantic", "text": "User prefers dark mode", "topics": ["preferences"], "entities": ["User"]}]}\n```', | ||
| ), | ||
| ( | ||
| lambda: CustomMemoryStrategy( | ||
| custom_prompt="Extract memories from: {message}" | ||
| ), | ||
| 'Custom extraction result:\n```json\n{"memories": [{"type": "semantic", "text": "User prefers async updates", "topics": ["communication"], "entities": ["User"]}]}\n```', | ||
| ), | ||
| ], | ||
| ) | ||
| async def test_strategies_parse_wrapped_json( | ||
| self, strategy_builder, response_content | ||
| ): | ||
| """All strategy variants should parse wrapped JSON without retry failure.""" | ||
| strategy = strategy_builder() | ||
| response = ChatCompletionResponse( | ||
| content=response_content, | ||
| finish_reason="stop", | ||
| prompt_tokens=100, | ||
| completion_tokens=50, | ||
| total_tokens=150, | ||
| model="gpt-4o-mini", | ||
| ) | ||
|
|
||
| with patch( | ||
| "agent_memory_server.memory_strategies.LLMClient.create_chat_completion", | ||
| new_callable=AsyncMock, | ||
| return_value=response, | ||
| ) as mock_create: | ||
| memories = await strategy.extract_memories("Store this memory.") | ||
|
|
||
| assert len(memories) == 1 | ||
| assert memories[0]["type"] == "semantic" | ||
| assert memories[0]["text"].startswith("User") | ||
| mock_create.assert_called_once() |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The exception handler includes
KeyError, but.get("entities", [])won’t raiseKeyError. The more likely failure mode here (especially with the new lenient parser) isAttributeError/TypeErrorif the parsed JSON isn’t a dict (e.g., model returns a top-level list). Catching those and treating them as a parse failure would avoid unexpected retries/crashes.